framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1,1,0,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1,2,0,0.012876799702644348
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1,4,0,0.012835200130939483
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1,4,0,0.018668800592422485
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1,32,0,0.012569600343704223
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1,8,0,0.018750399351119995
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1,16,0,0.01871200054883957
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1,32,0,0.018675200641155243
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1,8,0,0.012583999335765839
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16,1,0,0.014561599493026734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1,16,0,0.012591999769210816
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16,2,0,0.014608000218868256
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,1,64,0,0.01863040030002594
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16,8,0,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1,1,0,0.01894879937171936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1,2,0,0.018769599497318268
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,1,64,0,0.013055999577045441
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16,32,0,0.013473600149154663
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16,16,0,0.012667199969291687
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16,1,0,0.02282080054283142
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,16,64,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16,4,0,0.014486399292945863
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16,16,0,0.020790399610996248
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16,2,0,0.018713599443435668
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,16,64,0,0.02075839936733246
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32,1,0,0.014638400077819825
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16,32,0,0.019012799859046935
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32,4,0,0.014628799259662628
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16,4,0,0.01876160055398941
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16,8,0,0.018807999789714813
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32,2,0,0.013670399785041809
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32,16,0,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32,32,0,0.01459839940071106
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32,8,0,0.012542399764060973
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32,1,0,0.02276960015296936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,32,64,0,0.014523200690746307
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,64,1,0,0.016553600132465363
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32,32,0,0.020827199518680572
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,64,2,0,0.015324799716472626
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32,16,0,0.020824000239372253
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,32,64,0,0.020735999941825865
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32,8,0,0.02279199957847595
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32,2,0,0.02065120041370392
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32,4,0,0.01876640021800995
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,64,4,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,64,16,0,0.014591999351978302
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,64,8,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,64,64,0,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,64,1,0,0.024784000217914583
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,64,32,0,0.014547200500965118
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,64,8,0,0.022819200158119203
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,64,16,0,0.022806400060653688
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,64,4,0,0.02284960001707077
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,64,2,0,0.02280319929122925
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,64,32,0,0.02080480009317398
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,64,64,0,0.022777600586414336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,128,1,0,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,128,8,0,0.016070400178432465
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,128,4,0,0.015936000645160674
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,128,2,0,0.01659359931945801
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,128,1,0,0.026428800821304322
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,128,64,0,0.014636799693107605
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,128,16,0,0.014720000326633453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,128,4,0,0.02277279943227768
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,128,2,0,0.025727999210357667
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,128,8,0,0.02280000001192093
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,128,32,0,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,128,32,0,0.022681599855422972
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,256,2,0,0.021108800172805788
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,256,4,0,0.01865759938955307
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,256,8,0,0.01868479996919632
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,256,1,0,0.026366400718688964
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,128,16,0,0.022776000201702118
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,128,64,0,0.023004800081253052
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,256,32,0,0.018673600256443025
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,256,64,0,0.018675200641155243
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,256,1,0,0.03351680040359497
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,256,16,0,0.01866080015897751
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,256,2,0,0.028951999545097352
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,256,32,0,0.024860799312591553
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,256,64,0,0.026795199513435362
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,256,4,0,0.02699199914932251
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,512,1,0,0.03503519892692566
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,512,4,0,0.025009599328041077
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,512,8,0,0.02309280037879944
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,256,8,0,0.026888000965118408
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,512,16,0,0.02272160053253174
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,512,2,0,0.028918400406837463
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,256,16,0,0.026574400067329407
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,512,64,0,0.022737599909305573
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,512,1,0,0.047363200783729555
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,512,32,0,0.020777599513530733
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,512,2,0,0.037057599425315856
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,512,8,0,0.030899199843406677
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,512,64,0,0.02889760136604309
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,512,16,0,0.028939199447631837
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1024,1,0,0.06797760128974914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1024,8,0,0.031004801392555237
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1024,4,0,0.03508000075817108
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1024,16,0,0.030943998694419862
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,512,32,0,0.02898559868335724
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,512,4,0,0.03300159871578216
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1024,2,0,0.04235999882221222
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1024,32,0,0.029228800535202028
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1024,1,0,0.08064320087432861
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,1024,64,0,0.028880000114440918
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1024,2,0,0.05342239737510681
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,1024,64,0,0.037064000964164734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1024,32,0,0.03714239895343781
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1024,16,0,0.037124800682067874
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1536,1,0,0.10723520517349243
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1536,2,0,0.06732959747314453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1536,4,0,0.046387198567390445
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1024,8,0,0.039212799072265624
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1024,4,0,0.04328320026397705
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1536,8,0,0.041105601191520694
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,1536,64,0,0.03701759874820709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1536,1,0,0.1218000054359436
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1536,16,0,0.03712159991264343
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1536,2,0,0.07615039944648742
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1536,32,0,0.03713279962539673
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1536,16,0,0.044715198874473575
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1536,4,0,0.05383359789848328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1536,32,0,0.04334399998188019
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,1536,64,0,0.04162879884243011
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1536,8,0,0.04951840043067932
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,2048,2,0,0.09128159880638123
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,2048,1,0,0.1508687973022461
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,2048,8,0,0.05137119889259338
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,2048,4,0,0.05741440057754517
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,2048,32,0,0.04530239999294281
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,2048,64,0,0.04325920045375824
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,2048,2,0,0.10201439857482911
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,2048,16,0,0.045342400670051575
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,2048,4,0,0.06548159718513488
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,2048,8,0,0.05734879970550537
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,2048,16,0,0.05178400278091431
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,2048,32,0,0.049716800451278687
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,2048,1,0,0.16699999570846558
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,3072,1,0,0.253985595703125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,3072,8,0,0.06967840194702149
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,3072,4,0,0.0948736011981964
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,3072,16,0,0.0620576024055481
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,3072,2,0,0.15012480020523072
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,2048,64,0,0.04933759868144989
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,3072,32,0,0.05964319705963135
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,3072,1,0,0.27148799896240233
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,3072,2,0,0.15946240425109864
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,3072,4,0,0.10328799486160278
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,3072,64,0,0.06373760104179382
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,3072,8,0,0.07508959770202636
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,3072,64,0,0.05758399963378906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,3072,16,0,0.06786400079727173
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,4096,1,0,0.38732960224151614
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,4096,2,0,0.2187887907028198
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,4096,8,0,0.08727520108222961
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,4096,4,0,0.1360640048980713
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,3072,32,0,0.06569120287895203
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,4096,16,0,0.0790063977241516
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,4096,64,0,0.07389600276947021
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,4096,2,0,0.22741279602050782
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,4096,32,0,0.07601280212402343
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,4096,4,0,0.1426975965499878
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,4096,32,0,0.07810080051422119
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,4096,1,0,0.39542720317840574
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,4096,64,0,0.07799839973449707
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,4096,8,0,0.09115039706230163
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,6144,1,0,0.7401999950408935
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,4096,16,0,0.0824895977973938
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,6144,4,0,0.2312544107437134
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,6144,8,0,0.1492176055908203
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,6144,32,0,0.10536799430847169
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,6144,2,0,0.39877119064331057
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,6144,1,0,0.7012288093566894
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,6144,64,0,0.1026352047920227
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,6144,16,0,0.11229599714279175
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,6144,4,0,0.23231680393218995
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,6144,8,0,0.152292799949646
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,6144,16,0,0.11451679468154907
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,6144,2,0,0.3914639949798584
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,6144,64,0,0.1046447992324829
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,8192,4,0,0.35532479286193847
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,6144,32,0,0.10723520517349243
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,8192,2,0,0.6363455772399902
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,8192,8,0,0.2217855930328369
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,8192,1,0,1.2081919670104981
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,8192,16,0,0.1462399959564209
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,8192,64,0,0.12974879741668702
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,8192,32,0,0.1375200033187866
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,8192,1,0,1.0854351997375489
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,8192,8,0,0.22154560089111328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,8192,32,0,0.13705120086669922
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,8192,64,0,0.12946399450302123
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,8192,4,0,0.34419679641723633
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,8192,16,0,0.14540799856185913
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,8192,2,0,0.5967152118682861
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,10240,16,0,0.19337600469589233
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,10240,8,0,0.3058079957962036
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,10240,4,0,0.513483190536499
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,10240,2,0,0.9278800010681152
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,10240,32,0,0.166703999042511
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,10240,1,0,1.7825519561767578
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,10240,64,0,0.16209759712219238
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,10240,2,0,0.8347840309143066
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,10240,8,0,0.29587359428405763
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,10240,4,0,0.47491841316223143
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,10240,16,0,0.18892639875411987
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,10240,1,0,1.5450016021728517
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,10240,64,0,0.1586032032966614
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,10240,32,0,0.1646016001701355
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,12288,2,0,1.2799663543701172
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,12288,16,0,0.25579679012298584
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,12288,8,0,0.39703199863433836
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,12288,1,0,2.494363212585449
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,12288,4,0,0.693668794631958
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,12288,64,0,0.1897264003753662
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,12288,32,0,0.19855200052261351
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,12288,4,0,0.6262159824371338
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,12288,8,0,0.3737071990966797
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,12288,2,0,1.1135040283203126
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,12288,32,0,0.19327520132064818
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,12288,16,0,0.24816160202026366
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,12288,1,0,2.088159942626953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,12288,64,0,0.18484799861907958
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16384,4,0,1.1344816207885742
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16384,8,0,0.640012788772583
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16384,16,0,0.39555680751800537
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16384,2,0,2.136112022399902
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16384,32,0,0.2595423936843872
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16384,1,0,4.33197135925293
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,16384,64,0,0.2504271984100342
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16384,4,0,0.9831952095031739
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16384,1,0,3.506345748901367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16384,8,0,0.5757855892181396
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16384,2,0,1.7845455169677735
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16384,16,0,0.3711888074874878
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16384,32,0,0.2506239891052246
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32768,16,0,1.1882911682128907
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32768,32,0,0.7262432098388671
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32768,8,0,2.1588464736938477
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,16384,64,0,0.23986239433288575
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,32768,64,0,0.4951375961303711
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32768,2,0,8.379032135009766
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32768,4,0,4.23046875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32768,16,0,1.022771167755127
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32768,8,0,1.7386783599853515
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32768,4,0,3.293099212646484
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32768,2,0,6.471160125732422
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1,1,0,0.01658879965543747
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,32768,64,0,0.4537807941436768
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1,2,0,0.014620800316333771
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32768,1,0,16.8219970703125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1,4,0,0.014486399292945863
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1,16,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32768,1,0,12.926019287109375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32768,32,0,0.6658783912658691
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1,32,0,0.012600000202655792
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,1,64,0,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1,2,0,0.02093279957771301
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1,4,0,0.020747199654579163
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1,8,0,0.014545600116252898
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,1,64,0,0.020772799849510193
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1,32,0,0.020878399908542632
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16,1,0,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1,8,0,0.02064799964427948
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1,1,0,0.02267040014266968
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1,16,0,0.022784000635147093
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16,2,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16,32,0,0.014575999975204468
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16,8,0,0.014558400213718414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16,16,0,0.01465280055999756
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16,4,0,0.014678399264812469
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,16,64,0,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16,4,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16,8,0,0.02276960015296936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16,1,0,0.024718399345874786
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16,16,0,0.02088800072669983
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16,32,0,0.021080000698566435
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32,2,0,0.014567999541759491
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16,2,0,0.022731199860572815
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32,1,0,0.016513599455356597
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,16,64,0,0.02276639938354492
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32,16,0,0.01462240070104599
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32,32,0,0.014588800072669984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32,4,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,32,64,0,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32,8,0,0.01451839953660965
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32,2,0,0.02274080067873001
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32,8,0,0.022071999311447144
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32,4,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32,1,0,0.02496960014104843
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32,16,0,0.02282080054283142
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,32,64,0,0.02168000042438507
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,64,2,0,0.01674720048904419
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32,32,0,0.021358400583267212
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,64,8,0,0.014524799585342408
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,64,1,0,0.01860480010509491
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,64,16,0,0.014508800208568573
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,64,32,0,0.014601600170135499
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,64,64,0,0.01316000074148178
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,64,4,0,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,64,1,0,0.02682720124721527
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,64,4,0,0.02282399982213974
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,64,2,0,0.02282080054283142
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,64,16,0,0.022777600586414336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,128,1,0,0.022711999714374542
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,64,32,0,0.022742399573326112
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,128,2,0,0.018670399487018586
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,64,8,0,0.022731199860572815
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,128,8,0,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,128,4,0,0.01736160069704056
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,128,16,0,0.01656160056591034
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,64,64,0,0.020745599269866945
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,128,64,0,0.014561599493026734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,128,1,0,0.03300159871578216
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,128,32,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,128,4,0,0.024798400700092316
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,128,64,0,0.022814400494098663
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,256,1,0,0.030958399176597595
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,128,8,0,0.024868799746036528
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,128,2,0,0.026759999990463256
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,256,2,0,0.024771200120449068
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,128,16,0,0.022729599475860597
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,128,32,0,0.022873599827289582
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,256,8,0,0.020745599269866945
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,256,16,0,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,256,4,0,0.020694400370121
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,256,32,0,0.018681600689888
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,256,8,0,0.026982399821281432
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,256,64,0,0.018587200343608855
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,256,16,0,0.02693600058555603
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,256,64,0,0.02484000027179718
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,256,32,0,0.02688319981098175
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,256,2,0,0.03508960008621216
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,256,4,0,0.030987200140953065
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,256,1,0,0.04334560036659241
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,512,1,0,0.05804640054702759
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,512,4,0,0.02890079915523529
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,512,8,0,0.026870399713516235
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,512,2,0,0.036513599753379825
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,512,16,0,0.024775999784469604
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,512,32,0,0.022699199616909027
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,512,2,0,0.045311999320983884
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,512,4,0,0.03916960060596466
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,512,64,0,0.02282560020685196
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1024,1,0,0.1162335991859436
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,512,1,0,0.0717311978340149
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,512,16,0,0.03097119927406311
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,512,32,0,0.029193601012229918
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,512,64,0,0.028836798667907716
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,512,8,0,0.033030399680137636
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1024,2,0,0.07003200054168701
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1024,8,0,0.037723198533058167
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,1024,64,0,0.030934399366378783
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1024,32,0,0.03097440004348755
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1024,4,0,0.05345600247383118
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1024,1,0,0.1380687952041626
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1024,4,0,0.043243199586868286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1024,8,0,0.04565280079841614
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1024,16,0,0.03298400044441223
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1024,2,0,0.08402720093727112
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1024,16,0,0.03920640051364899
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,1024,64,0,0.03711200058460236
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1024,32,0,0.0390639990568161
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1536,1,0,0.18711520433425904
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1536,4,0,0.06812480092048645
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1536,2,0,0.10984959602355956
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,1536,64,0,0.03922719955444336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1536,32,0,0.04112800061702728
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1536,1,0,0.21379199028015136
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1536,8,0,0.04880799949169159
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1536,16,0,0.04328800141811371
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1536,4,0,0.0789135992527008
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1536,2,0,0.12486399412155151
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1536,16,0,0.05041279792785645
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1536,32,0,0.04915359914302826
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1536,8,0,0.05761439800262451
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,1536,64,0,0.047270399332046506
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,2048,4,0,0.09494879841804504
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,2048,8,0,0.060303997993469236
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,2048,16,0,0.05357760190963745
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,2048,2,0,0.15259360074996947
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,2048,32,0,0.04930239915847778
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,2048,1,0,0.2683072090148926
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,2048,1,0,0.2973455905914307
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,2048,2,0,0.1708783984184265
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,2048,64,0,0.04737440049648285
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,2048,8,0,0.06843680143356323
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,2048,4,0,0.10561120510101318
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,2048,32,0,0.0554639995098114
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,3072,2,0,0.26031520366668703
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,2048,16,0,0.06026080250740051
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,3072,1,0,0.4755280017852783
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,2048,64,0,0.053547197580337526
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,3072,8,0,0.09822400212287903
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,3072,4,0,0.15656640529632568
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,3072,16,0,0.07236800193786622
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,3072,32,0,0.06619679927825928
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,3072,64,0,0.06394720077514648
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,3072,2,0,0.27756319046020506
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,3072,8,0,0.10858399868011474
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,3072,4,0,0.16557120084762572
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,3072,64,0,0.06983680129051209
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,3072,16,0,0.08025280237197877
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,3072,32,0,0.07395520210266113
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,4096,1,0,0.7339072227478027
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,4096,2,0,0.3936271905899048
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,3072,1,0,0.4968224048614502
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,4096,8,0,0.1428048014640808
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,4096,32,0,0.08506399989128113
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,4096,16,0,0.09215840101242065
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,4096,64,0,0.08016160130500793
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,4096,4,0,0.22440640926361083
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,4096,2,0,0.40220160484313966
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,4096,1,0,0.7339439868927002
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,4096,8,0,0.15024160146713256
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,4096,16,0,0.09862080216407776
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,4096,64,0,0.08627200126647949
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,4096,4,0,0.23404479026794434
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,4096,32,0,0.09048799872398376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,6144,2,0,0.744865608215332
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,6144,4,0,0.4098800182342529
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,6144,8,0,0.23856000900268554
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,6144,1,0,1.4349200248718261
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,6144,16,0,0.1569056034088135
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,6144,32,0,0.12146719694137573
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,6144,2,0,0.7126463890075684
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,6144,1,0,1.3283856391906739
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,6144,4,0,0.4028783798217773
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,6144,8,0,0.24173119068145751
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,6144,64,0,0.11336480379104615
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,6144,32,0,0.12520159482955934
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,6144,64,0,0.11705440282821655
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,6144,16,0,0.16106239557266236
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,8192,8,0,0.36997280120849607
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,8192,4,0,0.6533552169799804
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,8192,16,0,0.23391358852386473
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,8192,1,0,2.3663152694702148
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,8192,32,0,0.1574288010597229
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,8192,64,0,0.14859999418258668
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,8192,2,0,1.20654239654541
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,8192,2,0,1.1000896453857423
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,8192,8,0,0.36032159328460694
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,8192,64,0,0.15003039836883544
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,8192,1,0,2.0730239868164064
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,8192,32,0,0.16062400341033936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,8192,16,0,0.2370703935623169
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,8192,4,0,0.6100895881652832
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,10240,1,0,3.632891082763672
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,10240,8,0,0.5227968215942382
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,10240,4,0,0.9490768432617187
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,10240,2,0,1.786676788330078
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,10240,64,0,0.18157919645309448
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,10240,32,0,0.20382080078125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,10240,16,0,0.3207087993621826
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,10240,4,0,0.8560432434082031
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,10240,16,0,0.31477439403533936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,10240,1,0,2.9950096130371096
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,10240,32,0,0.2054095983505249
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,10240,8,0,0.49576478004455565
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,10240,2,0,1.5657232284545899
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,10240,64,0,0.18160959482192993
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,12288,4,0,1.2962608337402344
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,12288,2,0,2.4526880264282225
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,12288,16,0,0.41434078216552733
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,12288,1,0,5.190580749511719
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,12288,8,0,0.7142079830169678
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,12288,64,0,0.21475520133972167
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,12288,32,0,0.2734623908996582
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,12288,1,0,4.2366382598876955
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,12288,8,0,0.6491055965423584
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,12288,2,0,2.1120864868164064
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,12288,4,0,1.1370479583740234
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,12288,32,0,0.26856160163879395
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,12288,64,0,0.21463680267333984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,12288,16,0,0.3961695909500122
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16384,16,0,0.6605887889862061
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16384,4,0,2.1428159713745116
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16384,32,0,0.41901440620422364
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16384,8,0,1.1621359825134276
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,16384,64,0,0.2867647886276245
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16384,2,0,4.387083053588867
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16384,16,0,0.6059679985046387
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16384,8,0,1.0134927749633789
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16384,1,0,8.855899047851562
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16384,4,0,1.8165088653564454
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16384,2,0,3.539263916015625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16384,32,0,0.4020832061767578
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,16384,64,0,0.2781264066696167
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16384,1,0,7.110219573974609
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32768,32,0,1.2286720275878906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32768,16,0,2.1631343841552733
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32768,8,0,4.285276794433594
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,32768,64,0,0.7762464046478271
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32768,4,0,8.472711944580078
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32768,16,0,1.795849609375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32768,8,0,3.339823913574219
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32768,2,0,16.976542663574218
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32768,4,0,6.4372306823730465
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,32768,64,0,0.7174255847930908
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1,1,0,0.02139039933681488
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32768,32,0,1.0838496208190918
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32768,2,0,12.938606262207031
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1,4,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1,8,0,0.014521600306034088
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1,16,0,0.014526399970054626
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1,2,0,0.016524800658226015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,1,64,0,0.013275200128555298
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1,2,0,0.024031999707221984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1,1,0,0.024928000569343568
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1,4,0,0.022815999388694764
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1,32,0,0.01266079992055893
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1,8,0,0.02152319997549057
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1,16,0,0.02066880017518997
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1,32,0,0.02077919989824295
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,1,64,0,0.02073120027780533
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32768,1,0,34.15047607421875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16,1,0,0.019582399725914003
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32768,1,0,26.306826782226562
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16,4,0,0.014499199390411378
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16,2,0,0.016616000235080718
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16,8,0,0.01451359987258911
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16,16,0,0.014854399859905243
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16,32,0,0.014620800316333771
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16,1,0,0.02685439884662628
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16,4,0,0.02269600033760071
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,16,64,0,0.01451359987258911
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16,32,0,0.02231519967317581
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16,8,0,0.022835199534893037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16,16,0,0.022697600722312927
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,16,64,0,0.02078080028295517
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16,2,0,0.022763200104236603
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,32,1,0,0.02276480048894882
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,32,2,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,32,8,0,0.014929600059986115
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,32,64,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,32,32,0,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,32,16,0,0.014534400403499603
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,32,2,0,0.024780799448490144
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,32,4,0,0.01656640022993088
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,32,1,0,0.02892799973487854
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,32,8,0,0.022804799675941467
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,32,4,0,0.022697600722312927
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,32,16,0,0.020720000565052032
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,32,32,0,0.022808000445365906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,64,1,0,0.02513279914855957
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,32,64,0,0.020747199654579163
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,64,2,0,0.0186256006360054
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,64,32,0,0.014510400593280792
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,64,16,0,0.015015999972820281
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,64,1,0,0.035124799609184264
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,64,64,0,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,64,4,0,0.016654400527477263
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,64,2,0,0.02820959985256195
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,64,8,0,0.014550399780273438
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,64,8,0,0.02479359954595566
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,64,16,0,0.022694399952888487
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,64,32,0,0.02077440023422241
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,64,4,0,0.024774399399757386
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,64,64,0,0.020777599513530733
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,128,2,0,0.022776000201702118
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,128,1,0,0.0327919989824295
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,128,8,0,0.016655999422073364
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,128,16,0,0.016531200706958772
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,128,4,0,0.01863040030002594
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,128,32,0,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,128,1,0,0.044537600874900815
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,128,2,0,0.03307999968528748
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,128,8,0,0.024822400510311128
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,128,64,0,0.016574400663375854
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,128,16,0,0.02487040013074875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,256,2,0,0.03306719958782196
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,128,4,0,0.028881600499153136
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,256,1,0,0.051507198810577394
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,128,32,0,0.024825599789619446
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,256,4,0,0.02552480101585388
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,128,64,0,0.024827200174331664
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,256,16,0,0.02072640061378479
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,256,32,0,0.018727999925613404
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,256,1,0,0.06531040072441101
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,256,2,0,0.04333440065383911
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,256,8,0,0.022734400629997254
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,256,64,0,0.018985599279403687
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,256,4,0,0.035099199414253233
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,256,32,0,0.026923200488090514
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,256,16,0,0.0267984002828598
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,256,64,0,0.02688319981098175
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,512,1,0,0.09945759773254395
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,256,8,0,0.028951999545097352
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,512,8,0,0.030910399556159974
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,512,2,0,0.05783360004425049
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,512,16,0,0.026923200488090514
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,512,64,0,0.022788800299167633
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,512,32,0,0.024830399453639983
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,512,4,0,0.03683840036392212
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,512,1,0,0.12328159809112549
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,512,2,0,0.07477759718894958
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,512,16,0,0.03518239855766296
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,512,32,0,0.03297759890556336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,512,64,0,0.03095200061798096
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1024,2,0,0.1187440037727356
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,512,4,0,0.04941120147705078
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,512,8,0,0.039136001467704774
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1024,1,0,0.20752480030059814
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1024,16,0,0.04111840128898621
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1024,8,0,0.045879998803138734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1024,32,0,0.036051198840141296
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1024,4,0,0.07223520278930665
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,1024,64,0,0.0350816011428833
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1024,1,0,0.24788320064544678
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1024,4,0,0.08628640174865723
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1024,8,0,0.057601600885391235
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1024,2,0,0.1414191961288452
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1024,32,0,0.045270401239395144
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1024,16,0,0.04941120147705078
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1536,1,0,0.3427999973297119
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,1024,64,0,0.04327360093593598
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1536,8,0,0.07243679761886597
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1536,2,0,0.190065598487854
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1536,4,0,0.11309599876403809
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1536,32,0,0.04731999933719635
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,1536,64,0,0.045307201147079465
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1536,16,0,0.053467202186584475
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1536,1,0,0.39000959396362306
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1536,4,0,0.12964160442352296
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1536,16,0,0.06192799806594849
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1536,2,0,0.22015199661254883
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1536,32,0,0.055364799499511716
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1536,8,0,0.08424479961395263
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,1536,64,0,0.05147839784622192
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,2048,2,0,0.2734143972396851
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,2048,1,0,0.5068319797515869
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,2048,4,0,0.16155680418014526
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,2048,8,0,0.09970080256462097
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,2048,32,0,0.059305602312088014
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,2048,1,0,0.5521440029144287
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,2048,64,0,0.053446400165557864
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,2048,16,0,0.06444159746170045
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,2048,2,0,0.3048144102096558
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,2048,8,0,0.11145279407501221
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,2048,16,0,0.07614399790763855
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,2048,32,0,0.067220801115036
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,2048,4,0,0.17732640504837036
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,2048,64,0,0.06367040276527405
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,3072,1,0,0.9177951812744141
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,3072,8,0,0.1632688045501709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,3072,2,0,0.4832736015319824
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,3072,4,0,0.26811680793762205
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,3072,32,0,0.08222560286521911
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,3072,64,0,0.0753711998462677
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,3072,1,0,0.9379839897155762
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,3072,16,0,0.10889600515365601
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,3072,2,0,0.505620813369751
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,3072,4,0,0.28892319202423095
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,3072,16,0,0.11912480592727662
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,3072,32,0,0.09105920195579528
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,3072,8,0,0.17512960433959962
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,3072,64,0,0.0822928011417389
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,4096,16,0,0.15561439990997314
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,4096,32,0,0.10284639596939087
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,4096,8,0,0.2389296054840088
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,4096,1,0,1.4467439651489258
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,4096,4,0,0.40677919387817385
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,4096,2,0,0.748206377029419
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,4096,64,0,0.09655839800834656
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,4096,1,0,1.402571201324463
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,4096,2,0,0.7484320163726806
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,4096,4,0,0.4174448013305664
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,4096,8,0,0.2500319957733154
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,4096,16,0,0.164465594291687
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,4096,32,0,0.11278879642486572
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,4096,64,0,0.10473439693450928
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,6144,2,0,1.454075241088867
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,6144,8,0,0.4250944137573242
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,6144,1,0,2.8437616348266603
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,6144,4,0,0.7624527931213378
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,6144,16,0,0.26017279624938966
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,6144,32,0,0.17463359832763672
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,6144,64,0,0.1357103943824768
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,6144,16,0,0.2654880046844482
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,6144,4,0,0.7346528053283692
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,6144,32,0,0.1854192018508911
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,6144,64,0,0.14758720397949218
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,6144,2,0,1.3457887649536133
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,6144,8,0,0.42614240646362306
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,6144,1,0,2.589628791809082
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,8192,16,0,0.39606080055236814
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,8192,4,0,1.2317551612854003
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,8192,8,0,0.6696544170379639
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,8192,2,0,2.376835250854492
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,8192,1,0,4.903252792358399
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,8192,32,0,0.2566368103027344
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,8192,64,0,0.17935839891433716
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,8192,8,0,0.6411952018737793
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,8192,4,0,1.1285759925842285
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,8192,2,0,2.104319953918457
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,8192,16,0,0.39076159000396726
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,8192,64,0,0.18810399770736694
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,8192,1,0,4.164406585693359
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,10240,16,0,0.5557328224182129
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,8192,32,0,0.2636672019958496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,10240,4,0,1.8060079574584962
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,10240,8,0,0.9792592048645019
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,10240,2,0,3.6388240814208985
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,10240,32,0,0.35097761154174806
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,10240,1,0,7.485752105712891
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,10240,4,0,1.603219223022461
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,10240,64,0,0.2322240114212036
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,10240,8,0,0.8913455963134765
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,10240,2,0,3.0750303268432617
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,10240,16,0,0.5323696136474609
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,10240,32,0,0.3503551959991455
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,10240,1,0,6.148678588867187
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,10240,64,0,0.2424623966217041
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,12288,16,0,0.7488080024719238
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,12288,4,0,2.5572847366333007
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,12288,32,0,0.4524543762207031
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,12288,8,0,1.3206735610961915
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,12288,64,0,0.307590389251709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,12288,2,0,5.174604797363282
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,12288,16,0,0.6932112216949463
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,12288,2,0,4.3114768981933596
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,12288,4,0,2.1503999710083006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,12288,1,0,10.411682891845704
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,12288,8,0,1.1830096244812012
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,12288,32,0,0.4400767803192139
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,12288,1,0,8.515342712402344
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,12288,64,0,0.3139616012573242
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16384,32,0,0.7103871822357177
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16384,16,0,1.2010080337524414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,16384,64,0,0.4652719974517822
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16384,4,0,4.412886428833008
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16384,8,0,2.214182472229004
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16384,2,0,9.092475128173827
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16384,8,0,1.8752176284790039
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16384,16,0,1.0723600387573242
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16384,4,0,3.586337661743164
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16384,32,0,0.6657311916351318
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16384,2,0,7.242011260986328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16384,1,0,18.180474853515626
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,16384,64,0,0.45560479164123535
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1,1,0,0.02534080147743225
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1,2,0,0.020979200303554536
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1,8,0,0.014585599303245544
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16384,1,0,14.424069213867188
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1,4,0,0.015780800580978395
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1,1,0,0.030988800525665283
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,1,64,0,0.013574400544166565
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1,2,0,0.02687999904155731
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1,16,0,0.014524799585342408
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1,4,0,0.022753599286079406
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1,32,0,0.014560000598430633
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1,32,0,0.02279199957847595
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,1,64,0,0.022742399573326112
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1,16,0,0.02073120027780533
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,16,1,0,0.026902401447296144
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,16,4,0,0.014588800072669984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1,8,0,0.020843200385570526
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,16,2,0,0.02062080055475235
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,16,16,0,0.014628799259662628
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,16,8,0,0.014779199659824372
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,16,1,0,0.035102400183677676
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,16,2,0,0.028836798667907716
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,16,32,0,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,16,16,0,0.02280000001192093
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,16,8,0,0.02279839962720871
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,16,64,0,0.014593599736690522
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,16,4,0,0.024801599979400634
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,32,1,0,0.03091840147972107
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,16,64,0,0.022759999334812164
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,32,2,0,0.020670400559902193
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,16,32,0,0.02279520034790039
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,32,4,0,0.016620799899101257
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,32,64,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,32,1,0,0.041300800442695615
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,32,8,0,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,32,2,0,0.030924800038337707
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,32,32,0,0.014532800018787383
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,32,4,0,0.024881599843502043
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,32,16,0,0.01586560010910034
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,32,8,0,0.022779199481010436
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,32,16,0,0.02279520034790039
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,64,1,0,0.03914400041103363
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,32,32,0,0.022811199724674224
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,64,2,0,0.02481279969215393
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,32,64,0,0.02271520048379898
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,64,4,0,0.018639999628067016
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,64,16,0,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,64,32,0,0.014595200121402741
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,64,8,0,0.016654400527477263
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,64,64,0,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,64,1,0,0.04954560101032257
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,64,16,0,0.02282720059156418
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,64,2,0,0.03508639931678772
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,64,4,0,0.026793599128723145
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,64,8,0,0.026840001344680786
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,64,32,0,0.022703999280929567
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,128,1,0,0.05252640247344971
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,128,4,0,0.025251200795173644
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,128,2,0,0.03307200074195862
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,64,64,0,0.021240000426769257
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,128,8,0,0.018671999871730804
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,128,64,0,0.016572800278663636
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,128,32,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,128,16,0,0.0186256006360054
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,128,1,0,0.06813600063323974
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,128,2,0,0.045300799608230594
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,128,16,0,0.02677919864654541
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,128,8,0,0.02687999904155731
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,128,32,0,0.024777600169181825
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,128,4,0,0.03301919996738434
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,256,1,0,0.0913424015045166
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,256,2,0,0.05142719745635986
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,128,64,0,0.024799999594688416
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,256,4,0,0.034939199686050415
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,256,16,0,0.02457599937915802
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,256,32,0,0.022737599909305573
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,256,8,0,0.02683520019054413
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,256,1,0,0.116211199760437
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,256,2,0,0.06782879829406738
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,256,16,0,0.031014400720596313
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,256,64,0,0.020795199275016784
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,256,32,0,0.029193601012229918
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,256,8,0,0.03718400001525879
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,256,4,0,0.04528799951076508
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,256,64,0,0.028923198580741882
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,512,1,0,0.179476797580719
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,512,2,0,0.10304000377655029
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,512,4,0,0.06122879981994629
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,512,32,0,0.028974398970603943
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,512,16,0,0.03440159857273102
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,512,1,0,0.2210383892059326
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,512,2,0,0.12662880420684813
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,512,4,0,0.07705439925193787
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,512,8,0,0.04007999897003174
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,512,64,0,0.02889760136604309
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,512,8,0,0.05140479803085327
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,512,16,0,0.0433135986328125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,512,32,0,0.039124798774719236
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1024,2,0,0.21419999599456788
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,512,64,0,0.037062400579452516
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1024,1,0,0.3919663906097412
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1024,16,0,0.05144320130348205
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1024,4,0,0.12480319738388061
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1024,8,0,0.07686560153961182
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,1024,64,0,0.041176000237464906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1024,32,0,0.04529759883880615
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1024,1,0,0.45937280654907225
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1024,2,0,0.25443038940429685
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1024,4,0,0.14784159660339355
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1024,8,0,0.09355999827384949
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1024,16,0,0.0642624020576477
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,1024,64,0,0.05034559965133667
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1536,2,0,0.3508671998977661
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1536,1,0,0.6586863994598389
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1024,32,0,0.05592160224914551
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1536,16,0,0.08210880160331727
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1536,8,0,0.12115999460220336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,1536,64,0,0.05582879781723023
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1536,32,0,0.06097279787063599
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1536,1,0,0.7394591808319092
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1536,4,0,0.19864319562911986
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1536,2,0,0.4015984058380127
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1536,8,0,0.13940800428390504
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1536,16,0,0.09463040232658386
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1536,4,0,0.22924480438232422
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,1536,64,0,0.06576640009880066
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1536,32,0,0.07346879839897155
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,2048,1,0,0.9910719871520997
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,2048,8,0,0.17051520347595214
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,2048,4,0,0.2870448112487793
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,2048,16,0,0.10988320112228393
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,2048,64,0,0.07032639980316162
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,2048,2,0,0.5175936222076416
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,2048,1,0,1.060753631591797
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,2048,32,0,0.07665600180625916
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,2048,2,0,0.5667759895324707
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,2048,4,0,0.318886399269104
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,2048,8,0,0.19130879640579224
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,2048,32,0,0.09050880074501037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,2048,16,0,0.1286080002784729
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,3072,4,0,0.5000207901000977
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,3072,8,0,0.28629279136657715
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,3072,2,0,0.9337759971618652
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,2048,64,0,0.0821120023727417
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,3072,32,0,0.12670719623565674
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,3072,64,0,0.09861440062522889
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,3072,1,0,1.8016447067260741
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,3072,16,0,0.1807647943496704
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,3072,1,0,1.8240768432617187
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,3072,2,0,0.9571711540222168
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,3072,32,0,0.140447998046875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,3072,4,0,0.5274928092956543
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,3072,16,0,0.19729759693145751
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,3072,8,0,0.30994720458984376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,4096,8,0,0.4287888050079346
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,4096,32,0,0.17774720191955568
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,4096,16,0,0.25967519283294677
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,3072,64,0,0.11068320274353027
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,4096,4,0,0.7701807975769043
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,4096,2,0,1.4651215553283692
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,4096,1,0,2.9133920669555664
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,4096,16,0,0.27984158992767333
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,4096,4,0,0.775812816619873
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,4096,2,0,1.4347423553466796
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,4096,64,0,0.12710560560226442
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,4096,8,0,0.4466288089752197
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,4096,1,0,2.7401615142822267
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,4096,32,0,0.19393279552459716
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,4096,64,0,0.14162399768829345
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,6144,16,0,0.4603280067443848
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,6144,4,0,1.4760095596313476
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,6144,2,0,2.9269344329833986
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,6144,8,0,0.8020256042480469
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,6144,32,0,0.29257760047912595
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,6144,1,0,5.965241622924805
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,6144,64,0,0.21200320720672608
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,6144,4,0,1.393841552734375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,6144,16,0,0.468555212020874
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,6144,2,0,2.626456069946289
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,6144,1,0,5.15973129272461
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,6144,32,0,0.3074879884719849
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,6144,8,0,0.7752223968505859
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,6144,64,0,0.22795519828796387
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,8192,8,0,1.2753408432006836
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,8192,64,0,0.30593440532684324
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,8192,16,0,0.7206992149353028
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,8192,4,0,2.438478469848633
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,8192,32,0,0.44121761322021485
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,8192,2,0,4.923628616333008
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,8192,1,0,10.07043685913086
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,8192,8,0,1.1883472442626952
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,8192,4,0,2.1686223983764648
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,8192,16,0,0.6980559825897217
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,8192,2,0,4.289246368408203
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,8192,32,0,0.4462768077850342
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1,2,0,0.02462400048971176
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,8192,64,0,0.321776008605957
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1,1,0,0.03376800119876862
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1,8,0,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,8192,1,0,8.427820587158203
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,1,64,0,0.014798399806022645
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1,16,0,0.015479999780654907
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1,4,0,0.019009600579738616
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1,2,0,0.03101919889450073
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1,4,0,0.025537601113319396
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1,1,0,0.04119040071964264
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1,16,0,0.021036800742149354
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1,32,0,0.014537599682807923
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1,8,0,0.02476000040769577
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,16,2,0,0.027305600047111512
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,16,1,0,0.041203200817108154
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,16,4,0,0.020759999752044678
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1,32,0,0.020713600516319274
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,1,64,0,0.020776000618934632
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,16,8,0,0.01536799967288971
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,16,1,0,0.04945760071277618
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,16,64,0,0.01446239948272705
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,16,16,0,0.016524800658226015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,16,2,0,0.035067200660705566
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,16,4,0,0.02890399992465973
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,16,16,0,0.022756800055503845
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,16,32,0,0.014531199634075165
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,16,8,0,0.024358400702476503
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,16,64,0,0.022703999280929567
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,16,32,0,0.022843199968338012
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,32,2,0,0.03300639986991882
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,32,8,0,0.01664000004529953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,32,1,0,0.04814240038394928
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,32,4,0,0.022804799675941467
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,32,16,0,0.015688000619411467
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,32,64,0,0.014499199390411378
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,32,2,0,0.041198399662971494
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,32,1,0,0.05761920213699341
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,32,4,0,0.03100000023841858
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,32,32,0,0.014504000544548035
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,32,16,0,0.022832000255584718
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,32,8,0,0.02484800070524216
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,32,64,0,0.022763200104236603
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,64,1,0,0.061740797758102414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,64,4,0,0.02687999904155731
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,64,2,0,0.039166399836540224
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,32,32,0,0.022809599339962006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,64,16,0,0.01677280068397522
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,64,64,0,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,64,8,0,0.0186256006360054
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,64,1,0,0.07808480262756348
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,64,32,0,0.01652960032224655
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,64,2,0,0.0498879998922348
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,64,4,0,0.037134400010108946
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,64,8,0,0.02682879865169525
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,64,16,0,0.026603201031684877
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,64,32,0,0.024795199930667877
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,128,1,0,0.09483199715614318
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,128,4,0,0.03510560095310211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,128,2,0,0.055420798063278195
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,128,8,0,0.024830399453639983
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,64,64,0,0.02284640073776245
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,128,16,0,0.020739200711250304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,128,64,0,0.018595199286937713
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,128,1,0,0.11959359645843506
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,128,2,0,0.06988800168037415
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,128,32,0,0.018644799292087556
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,128,4,0,0.045640000700950624
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,128,8,0,0.03510079979896545
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,128,16,0,0.030888000130653383
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,128,64,0,0.026918399333953857
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,256,1,0,0.16424640417098998
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,256,2,0,0.09469760060310364
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,256,4,0,0.05548480153083801
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,128,32,0,0.026825600862503053
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,256,8,0,0.03708640038967133
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,256,32,0,0.02486239969730377
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,256,64,0,0.024849599599838255
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,256,4,0,0.07149919867515564
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,256,1,0,0.20718719959259033
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,256,16,0,0.02895680069923401
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,256,2,0,0.12006720304489135
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,256,16,0,0.03914240002632141
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,256,8,0,0.049327999353408813
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,256,64,0,0.03364959955215454
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,512,1,0,0.3345632076263428
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,512,2,0,0.18465280532836914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,512,16,0,0.04535839855670929
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,512,4,0,0.10705599784851075
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,256,32,0,0.03507040143013
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,512,32,0,0.03917439877986908
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,512,64,0,0.035094401240348815
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,512,2,0,0.22964320182800294
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,512,1,0,0.41078720092773435
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,512,4,0,0.13213280439376832
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,512,32,0,0.04941920042037964
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,512,64,0,0.04526079893112182
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,512,8,0,0.08446879982948304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,512,8,0,0.06673280000686646
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1024,2,0,0.40392160415649414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1024,1,0,0.7646736145019531
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,512,16,0,0.059592002630233766
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1024,4,0,0.2267983913421631
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1024,16,0,0.09077439904212951
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1024,8,0,0.13751200437545777
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1024,32,0,0.06328960061073304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1024,2,0,0.4733920097351074
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1024,1,0,0.8823984146118165
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,1024,64,0,0.05584480166435242
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1024,16,0,0.10913439989089965
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1024,8,0,0.16376800537109376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1024,32,0,0.07795360088348388
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,1024,64,0,0.06992639899253845
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1024,4,0,0.26865758895874026
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1536,2,0,0.678707218170166
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1536,8,0,0.21723039150238038
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1536,1,0,1.3076111793518066
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1536,32,0,0.09852960109710693
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1536,16,0,0.13955039978027345
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,1536,64,0,0.07809919714927674
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1536,4,0,0.3684528112411499
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1536,16,0,0.16231839656829833
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1536,4,0,0.4220399856567383
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1536,2,0,0.7597695827484131
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,1536,64,0,0.09274079799652099
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1536,32,0,0.1172287940979004
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1536,1,0,1.4383935928344727
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1536,8,0,0.2520960092544556
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,2048,16,0,0.1934592008590698
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,2048,4,0,0.5402671813964843
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,2048,8,0,0.30688478946685793
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,2048,2,0,1.009596824645996
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,2048,32,0,0.13473600149154663
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,2048,1,0,1.956235122680664
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,2048,64,0,0.09924799799919129
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,2048,1,0,2.073753547668457
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,2048,8,0,0.3466048002243042
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,2048,4,0,0.5956111907958984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,2048,2,0,1.0859408378601074
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,2048,32,0,0.15671999454498292
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,2048,16,0,0.21976959705352783
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,2048,64,0,0.11934080123901367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,3072,64,0,0.1590656042098999
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,3072,16,0,0.3198591947555542
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,3072,32,0,0.21425280570983887
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,3072,8,0,0.5340288162231446
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,3072,4,0,0.9695839881896973
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,3072,2,0,1.8498048782348633
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,3072,1,0,3.6456977844238283
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,3072,32,0,0.2407680034637451
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,3072,4,0,1.000278377532959
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,3072,16,0,0.35316479206085205
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,3072,8,0,0.5721680164337158
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,3072,2,0,1.8629327774047852
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,3072,64,0,0.18371200561523438
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,3072,1,0,3.580081558227539
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,4096,8,0,0.8169648170471191
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,4096,16,0,0.47187042236328125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,4096,4,0,1.5027503967285156
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,4096,32,0,0.30644640922546384
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,4096,64,0,0.22557919025421141
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,4096,2,0,2.9706655502319337
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,4096,32,0,0.33723039627075196
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,4096,1,0,6.0088542938232425
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,4096,4,0,1.4910431861877442
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,4096,16,0,0.5034671783447265
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,4096,8,0,0.8291728019714355
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,4096,64,0,0.24995040893554688
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,4096,2,0,2.802907180786133
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,4096,1,0,5.511884689331055
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1,4,0,0.02475679963827133
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1,1,0,0.07460960149765014
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1,8,0,0.01860159933567047
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1,16,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1,32,0,0.014616000652313232
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1,2,0,0.035123199224472046
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1,1,0,0.06171839833259583
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1,2,0,0.041140800714492796
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1,4,0,0.03295519948005676
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1,8,0,0.026903998851776124
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1,16,0,0.02483839988708496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,1,64,0,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,16,1,0,0.0690447986125946
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,16,4,0,0.028678399324417115
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1,32,0,0.022756800055503845
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,16,2,0,0.04234879910945892
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,16,16,0,0.016638399660587312
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,16,8,0,0.020287999510765077
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,1,64,0,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,16,64,0,0.01626240015029907
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,16,32,0,0.01659200042486191
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,16,8,0,0.02690559923648834
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,16,1,0,0.0776848018169403
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,16,4,0,0.03499360084533691
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,16,16,0,0.024833600223064422
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,16,2,0,0.04954079985618591
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,16,64,0,0.022745600342750548
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,16,32,0,0.02282399982213974
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,32,2,0,0.04817599952220917
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,32,4,0,0.032953599095344545
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,32,16,0,0.01863359957933426
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,32,8,0,0.022711999714374542
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,32,1,0,0.08205119967460632
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,32,64,0,0.016550399363040924
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,32,32,0,0.016700799763202667
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,32,2,0,0.05951679944992065
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,32,4,0,0.04115360081195831
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,32,8,0,0.029174399375915528
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,32,16,0,0.026848000288009644
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,32,1,0,0.09490399956703185
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,32,64,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,32,32,0,0.024851199984550477
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,64,2,0,0.06319040060043335
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,64,4,0,0.04121440052986145
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,64,8,0,0.0270687997341156
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,64,1,0,0.11358239650726318
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,64,64,0,0.01666560024023056
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,64,16,0,0.02072640061378479
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,64,1,0,0.13880959749221802
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,64,2,0,0.08021280169487
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,64,32,0,0.01870719939470291
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,64,4,0,0.05351999998092651
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,64,8,0,0.037459200620651244
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,64,32,0,0.028841599822044373
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,64,64,0,0.024823999404907225
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,64,16,0,0.02895680069923401
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,128,2,0,0.09677600264549255
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,128,1,0,0.17248320579528809
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,128,8,0,0.03703359961509704
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,128,32,0,0.02475679963827133
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,128,16,0,0.026843199133872987
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,128,4,0,0.05762240290641785
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,128,64,0,0.0210207998752594
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,128,2,0,0.1231279969215393
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,128,4,0,0.07406880259513855
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,128,1,0,0.2187903881072998
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,128,8,0,0.04944800138473511
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,128,16,0,0.03706560134887695
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,128,32,0,0.03344480097293854
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,256,1,0,0.3074336051940918
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,256,2,0,0.17032159566879274
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,256,4,0,0.09981279969215393
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,128,64,0,0.030972799658775328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,256,16,0,0.041201600432395936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,256,8,0,0.06097279787063599
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,256,32,0,0.03505280017852783
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,256,64,0,0.030958399176597595
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,256,2,0,0.21262080669403077
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,256,1,0,0.386297607421875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,256,4,0,0.12603679895401002
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,256,16,0,0.05435839891433716
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,256,8,0,0.07818080186843872
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,256,32,0,0.04735200107097626
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,256,64,0,0.04124639928340912
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,512,1,0,0.648415994644165
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,512,2,0,0.34680159091949464
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,512,16,0,0.08024640083312988
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,512,8,0,0.11840959787368774
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,512,4,0,0.19719359874725342
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,512,64,0,0.04946880042552948
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,512,32,0,0.056032001972198486
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,512,4,0,0.24263999462127686
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,512,2,0,0.425710391998291
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,512,1,0,0.7919727802276612
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,512,8,0,0.14684159755706788
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,512,16,0,0.09950240254402161
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,512,64,0,0.0637440025806427
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,512,32,0,0.073716801404953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1024,2,0,0.7855023860931396
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1024,4,0,0.42628002166748047
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1024,8,0,0.2490463972091675
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1024,1,0,1.5028176307678223
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1024,16,0,0.1582335948944092
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1024,32,0,0.11396160125732421
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1024,4,0,0.5013631820678711
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,1024,64,0,0.08511199951171874
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1024,8,0,0.296014404296875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1024,1,0,1.7299951553344726
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1024,32,0,0.1376911997795105
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1024,16,0,0.19115840196609496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,1024,64,0,0.10909759998321533
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1024,2,0,0.9096768379211426
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1536,4,0,0.710814380645752
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1536,16,0,0.24968159198760986
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1536,1,0,2.6111215591430663
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1536,8,0,0.4018095970153809
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,1536,64,0,0.1325584053993225
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1536,2,0,1.340775966644287
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1536,32,0,0.17457760572433473
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1536,2,0,1.477505588531494
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1536,4,0,0.8038448333740235
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1536,32,0,0.20565121173858641
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1536,16,0,0.2941279888153076
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,1536,64,0,0.1599984049797058
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1536,8,0,0.4630335807800293
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1536,1,0,2.8270959854125977
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,2048,8,0,0.5826623916625977
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,2048,4,0,1.052409553527832
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,2048,16,0,0.3537600040435791
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,2048,32,0,0.23946080207824708
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,2048,1,0,3.9540145874023436
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,2048,2,0,2.0084943771362305
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,2048,64,0,0.18078399896621705
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,2048,8,0,0.6517375946044922
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,2048,16,0,0.4042960166931152
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,2048,4,0,1.1436688423156738
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1,1,0,0.09888799786567688
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,2048,2,0,2.1311296463012694
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,2048,64,0,0.2120431900024414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,2048,1,0,4.099801635742187
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,2048,32,0,0.27873759269714354
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1,2,0,0.055636799335479735
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1,4,0,0.03497920036315918
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1,16,0,0.018699200451374055
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1,8,0,0.024732799828052522
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,1,64,0,0.01454399973154068
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1,32,0,0.016620799899101257
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1,16,0,0.0267984002828598
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1,8,0,0.0330159991979599
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1,2,0,0.06317120194435119
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1,1,0,0.10357760190963745
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,1,64,0,0.022761599719524385
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1,32,0,0.02480800002813339
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1,4,0,0.041231998801231386
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,16,1,0,0.12336959838867187
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,16,2,0,0.068995201587677
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,16,4,0,0.042316800355911253
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,16,32,0,0.017092800140380858
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,16,8,0,0.028859201073646545
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,16,64,0,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,16,16,0,0.021161599457263945
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,16,4,0,0.04963679909706116
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,16,8,0,0.0350735992193222
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,16,2,0,0.07809919714927674
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,16,16,0,0.02884480059146881
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,16,64,0,0.022859199345111846
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,16,32,0,0.024875199794769286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,16,1,0,0.13544960021972657
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,32,2,0,0.08433279991149903
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,32,16,0,0.022808000445365906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,32,8,0,0.033046400547027587
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,32,4,0,0.04949280023574829
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,32,1,0,0.15294239521026612
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,32,32,0,0.01897120028734207
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,32,64,0,0.017960000038146972
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,32,2,0,0.09728320240974427
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,32,1,0,0.17471840381622314
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,32,8,0,0.043252798914909366
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,32,16,0,0.03301439881324768
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,32,64,0,0.024801599979400634
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,64,1,0,0.21289439201354982
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,32,32,0,0.02688319981098175
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,32,4,0,0.060310399532318114
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,64,2,0,0.11715999841690064
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,64,8,0,0.04322879910469055
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,64,16,0,0.030883198976516722
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,64,32,0,0.02282399982213974
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,64,64,0,0.022771200537681578
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,64,4,0,0.06744160056114197
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,64,4,0,0.08218560218811036
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,64,1,0,0.25789918899536135
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,64,8,0,0.056358402967453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,64,32,0,0.03306719958782196
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,64,2,0,0.1424831986427307
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,64,16,0,0.04121760129928589
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,64,64,0,0.030983999371528625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,128,1,0,0.32563199996948244
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,128,2,0,0.17662400007247925
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,128,8,0,0.06345279812812805
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,128,16,0,0.043263998627662656
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,128,32,0,0.0329120010137558
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,128,64,0,0.02889760136604309
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,128,4,0,0.10097600221633911
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,128,2,0,0.22576959133148194
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,128,1,0,0.4113664150238037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,128,4,0,0.12768479585647582
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,128,8,0,0.08164799809455872
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,128,16,0,0.05553280115127564
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,128,32,0,0.04472799897193909
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,128,64,0,0.04121600091457367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,256,1,0,0.5925055980682373
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,256,2,0,0.3184639930725098
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,256,8,0,0.11098719835281372
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,256,16,0,0.07317600250244141
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,256,32,0,0.053497600555419925
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,256,64,0,0.047336000204086306
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,256,4,0,0.18146560192108155
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,256,16,0,0.09402080178260804
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,256,4,0,0.228004789352417
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,256,2,0,0.40147838592529295
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,256,32,0,0.06984320282936096
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,256,64,0,0.061694401502609256
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,256,1,0,0.7443552017211914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,256,8,0,0.14189280271530152
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,512,1,0,1.2826080322265625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,512,4,0,0.36752479076385497
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,512,16,0,0.14072959423065184
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,512,32,0,0.10160800218582153
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,512,2,0,0.6709504127502441
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,512,8,0,0.21744959354400634
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,512,64,0,0.07804800271987915
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,512,4,0,0.4543039798736572
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,512,8,0,0.27083520889282225
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,512,16,0,0.17644959688186646
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,512,32,0,0.1294111967086792
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,512,64,0,0.10264639854431153
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,512,2,0,0.8216480255126953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,512,1,0,1.5511183738708496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1024,4,0,0.8302672386169434
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1024,1,0,3.050889587402344
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1024,8,0,0.46956801414489746
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1024,2,0,1.5498671531677246
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1024,16,0,0.29197919368743896
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1024,32,0,0.20483200550079345
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,1024,64,0,0.15811840295791627
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1024,8,0,0.5582320213317871
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1024,32,0,0.24754080772399903
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1024,2,0,1.786235237121582
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,1024,64,0,0.19285600185394286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1024,1,0,3.4175441741943358
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1024,4,0,0.9657072067260742
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,1,1,0,0.18135679960250856
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1024,16,0,0.3521680116653442
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,1,8,0,0.035067200660705566
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,1,16,0,0.024740800261497498
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,1,4,0,0.057043200731277464
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,1,2,0,0.09998400211334228
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,1,64,0,0.014584000408649444
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,1,1,0,0.18464159965515137
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,1,32,0,0.018620799481868743
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,1,4,0,0.06222079992294312
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,1,8,0,0.041228801012039185
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,1,16,0,0.033036801218986514
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,1,32,0,0.02691679894924164
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,1,64,0,0.022771200537681578
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,1,2,0,0.10409599542617798
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,16,1,0,0.23235840797424318
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,16,4,0,0.06981279850006103
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,16,2,0,0.12297439575195312
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,16,8,0,0.04319039881229401
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,16,64,0,0.01866080015897751
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,16,16,0,0.028935998678207397
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,16,32,0,0.02237280011177063
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,16,4,0,0.08077120184898376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,16,1,0,0.25219199657440183
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,16,2,0,0.13787200450897216
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,16,16,0,0.03914240002632141
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,16,8,0,0.05354239940643311
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,16,64,0,0.02683840095996857
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,16,32,0,0.031001600623130798
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,32,1,0,0.2918720006942749
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,32,2,0,0.1566848039627075
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,32,16,0,0.037118399143218996
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,32,8,0,0.05168799757957458
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,32,32,0,0.02685439884662628
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,32,64,0,0.020747199654579163
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,32,1,0,0.3229615926742554
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,32,2,0,0.17786879539489747
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,32,4,0,0.08679360151290894
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,32,4,0,0.10052800178527832
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,32,8,0,0.06320639848709106
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,32,32,0,0.03508000075817108
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,32,64,0,0.030904000997543334
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,64,1,0,0.40520319938659666
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,32,16,0,0.04699040055274963
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,64,2,0,0.21836159229278565
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,64,4,0,0.12355040311813355
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,64,16,0,0.047777599096298216
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,64,8,0,0.07209280133247375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,64,32,0,0.03707039952278137
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,64,64,0,0.028951999545097352
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,64,1,0,0.48738560676574705
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,64,2,0,0.26556479930877686
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,64,8,0,0.0915232002735138
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,64,32,0,0.047383999824523924
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,64,16,0,0.06165760159492493
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,64,64,0,0.03922719955444336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,64,4,0,0.14822080135345458
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,128,1,0,0.6333695888519287
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,128,2,0,0.336190390586853
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,128,4,0,0.18858879804611206
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,128,8,0,0.1133679986000061
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,128,16,0,0.07519040107727051
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,128,64,0,0.04532159864902496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,128,32,0,0.053491199016571046
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,128,2,0,0.42421278953552244
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,128,4,0,0.24152801036834717
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,128,8,0,0.14214880466461183
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,128,1,0,0.7922287940979004
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,128,16,0,0.09724000096321106
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,128,32,0,0.07191359996795654
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,256,1,0,1.1660287857055665
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,128,64,0,0.05932639837265015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,256,4,0,0.3414992094039917
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,256,8,0,0.20339839458465575
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,256,32,0,0.0944927990436554
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,256,2,0,0.6142608165740967
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,256,64,0,0.07406079769134521
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,256,16,0,0.1332800030708313
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,256,1,0,1.4603535652160644
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,256,64,0,0.09824479818344116
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,256,16,0,0.1681488037109375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,256,4,0,0.428604793548584
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,256,32,0,0.12273759841918945
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,256,2,0,0.7723455905914307
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,256,8,0,0.2570143938064575
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,512,4,0,0.7166399955749512
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,512,8,0,0.41155362129211426
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,512,2,0,1.3274944305419922
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,512,1,0,2.5480224609375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,512,16,0,0.2624847888946533
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,512,32,0,0.1848207950592041
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,512,64,0,0.1454848051071167
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,512,32,0,0.23351199626922609
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,512,8,0,0.5097487926483154
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,512,2,0,1.6094367980957032
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,512,4,0,0.8775296211242676
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,512,64,0,0.1849552035331726
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,512,16,0,0.3271087884902954
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,512,1,0,3.06945915222168
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,1,2,0,0.18636000156402588
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,1,4,0,0.09964320063591003
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,1,1,0,0.347321605682373
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,1,16,0,0.03500159978866577
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,1,8,0,0.05658400058746338
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,1,32,0,0.02492160052061081
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,1,64,0,0.018615999817848207
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,1,2,0,0.1870144009590149
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,1,4,0,0.10447520017623901
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,1,16,0,0.04113439917564392
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,1,64,0,0.02483839988708496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,1,32,0,0.03301759958267212
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,16,1,0,0.4441487789154053
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,1,1,0,0.3510335922241211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,16,2,0,0.2336496114730835
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,1,8,0,0.06395519971847534
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,16,8,0,0.07320799827575683
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,16,4,0,0.12753920555114745
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,16,16,0,0.04615040123462677
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,16,32,0,0.033000001311302186
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,16,64,0,0.024747200310230255
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,16,2,0,0.2552351951599121
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,16,8,0,0.08350399732589722
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,16,64,0,0.03308480083942413
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,16,32,0,0.04323360025882721
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,32,1,0,0.5591184139251709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,16,1,0,0.48076481819152833
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,16,16,0,0.055776000022888184
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,16,4,0,0.1395632028579712
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,32,2,0,0.29511198997497556
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,32,4,0,0.16242719888687135
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,32,8,0,0.09302880167961121
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,32,16,0,0.05773760080337524
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,32,32,0,0.04307680130004883
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,32,1,0,0.6184912204742432
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,32,4,0,0.18475680351257323
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,32,8,0,0.10939840078353882
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,32,64,0,0.033092799782752993
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,32,2,0,0.32970080375671384
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,32,16,0,0.07047520279884338
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,32,32,0,0.05345439910888672
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,32,64,0,0.043196800351142886
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,64,4,0,0.228438401222229
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,64,2,0,0.41672801971435547
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,64,32,0,0.05971840023994446
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,64,64,0,0.04734080135822296
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,64,16,0,0.08531360030174255
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,64,1,0,0.9459072113037109
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,64,8,0,0.1349503993988037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,64,1,0,0.7955376148223877
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,64,2,0,0.5010208129882813
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,64,4,0,0.27877440452575686
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,64,8,0,0.16246880292892457
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,64,16,0,0.10488159656524658
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,64,32,0,0.07601760029792785
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,128,2,0,0.653872013092041
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,128,4,0,0.3583008050918579
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,128,8,0,0.21087999343872071
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,64,64,0,0.061766397953033444
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,128,16,0,0.13585760593414306
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,128,32,0,0.09794399738311768
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,128,64,0,0.07595040202140808
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,128,1,0,1.2456255912780763
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,128,2,0,0.8224096298217773
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,128,4,0,0.45339360237121584
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,128,1,0,1.5613776206970216
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,128,64,0,0.10054880380630493
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,128,32,0,0.12605600357055663
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,128,8,0,0.26837759017944335
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,128,16,0,0.17292000055313111
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,256,2,0,1.2057744026184083
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,256,4,0,0.659822416305542
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,256,16,0,0.24592320919036864
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,256,8,0,0.383622407913208
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,256,32,0,0.17650879621505738
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,256,1,0,2.3170400619506837
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,256,64,0,0.13962559700012206
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,256,8,0,0.4835631847381592
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,256,32,0,0.22633759975433348
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,256,2,0,1.5165311813354492
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,256,4,0,0.828228759765625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,256,16,0,0.31345601081848146
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,256,1,0,2.881283187866211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,1,1,0,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1,2,0,0.01433439999818802
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,256,64,0,0.17952959537506102
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1,4,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1,8,0,0.01454080045223236
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1,16,0,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,1,1,0,0.022868800163269042
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1,2,0,0.021675199270248413
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1,8,0,0.020729599893093108
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1,64,0,0.012708799540996551
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1,16,0,0.0206496000289917
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1,32,0,0.020740799605846405
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1,32,0,0.013891200721263885
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1,4,0,0.0206496000289917
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1,64,0,0.020761600136756896
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,16,1,0,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16,2,0,0.014660799503326416
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16,8,0,0.014591999351978302
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16,16,0,0.014572800695896148
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16,4,0,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16,32,0,0.012571200728416443
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,16,1,0,0.022745600342750548
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16,2,0,0.022804799675941467
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16,64,0,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16,8,0,0.022729599475860597
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16,16,0,0.02070080041885376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16,4,0,0.02277279943227768
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16,32,0,0.020839999616146087
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,32,1,0,0.01661919951438904
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16,64,0,0.020716799795627593
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32,2,0,0.016017599403858183
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32,8,0,0.014547200500965118
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32,16,0,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32,32,0,0.014468799531459808
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32,4,0,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,32,1,0,0.024902400374412537
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32,64,0,0.014606399834156037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32,4,0,0.020678399503231047
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32,2,0,0.022780799865722658
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32,8,0,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32,16,0,0.02075680047273636
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32,32,0,0.02279680073261261
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,64,1,0,0.01873600035905838
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32,64,0,0.02266079932451248
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,64,4,0,0.014526399970054626
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,64,16,0,0.014548799395561219
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,64,8,0,0.01459839940071106
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,64,2,0,0.016630400717258454
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,64,32,0,0.01448799967765808
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,64,64,0,0.014627200365066529
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,64,1,0,0.026931199431419372
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,64,8,0,0.022779199481010436
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,64,16,0,0.02279680073261261
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,64,64,0,0.022776000201702118
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,128,1,0,0.022779199481010436
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,64,32,0,0.020827199518680572
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,64,2,0,0.02446240037679672
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,64,4,0,0.02454880028963089
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,128,4,0,0.016865600645542145
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,128,8,0,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,128,2,0,0.01860159933567047
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,128,16,0,0.014588800072669984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,128,32,0,0.014643199741840363
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,128,64,0,0.01465120017528534
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,128,1,0,0.03097760081291199
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,128,8,0,0.022771200537681578
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,128,16,0,0.024825599789619446
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,128,32,0,0.022753599286079406
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,128,2,0,0.026836800575256347
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,256,1,0,0.0319023996591568
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,128,64,0,0.02476159930229187
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,128,4,0,0.024780799448490144
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,256,2,0,0.02492319941520691
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,256,4,0,0.021273599565029146
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,256,8,0,0.01878879964351654
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,256,16,0,0.018648000061511995
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,256,64,0,0.017561599612236023
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,256,1,0,0.043278399109840396
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,256,4,0,0.031007999181747438
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,256,8,0,0.026892799139022826
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,256,32,0,0.018667200207710268
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,256,32,0,0.026873600482940675
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,256,16,0,0.026924800872802735
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,256,2,0,0.0347135990858078
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,512,2,0,0.035078400373458864
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,256,64,0,0.026771199703216553
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,512,1,0,0.057678401470184326
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,512,4,0,0.028908801078796387
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,512,16,0,0.024711999297142028
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,512,32,0,0.022806400060653688
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,512,1,0,0.07216479778289794
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,512,8,0,0.024843199551105498
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,512,64,0,0.02272160053253174
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,512,2,0,0.045745599269866946
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,512,4,0,0.037084800004959104
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,512,8,0,0.031040000915527343
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,512,16,0,0.030985599756240843
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,512,32,0,0.02901279926300049
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,512,64,0,0.028905600309371948
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1024,16,0,0.03267520070075989
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1024,4,0,0.04142560064792633
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1024,2,0,0.06817280054092408
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1024,32,0,0.028988799452781676
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,1024,1,0,0.11514400243759156
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,1024,1,0,0.13632800579071044
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1024,64,0,0.02890239953994751
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1024,8,0,0.03507040143013
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1024,2,0,0.08227840065956116
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1024,4,0,0.05190399885177612
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1024,16,0,0.03914400041103363
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1024,8,0,0.04329920113086701
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1024,64,0,0.03508319854736328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,1536,1,0,0.18738240003585815
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1024,32,0,0.037064000964164734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1536,4,0,0.06609439849853516
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1536,16,0,0.041275200247764585
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1536,64,0,0.03716799914836884
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1536,32,0,0.03906719982624054
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1536,2,0,0.10699520111083985
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,1536,1,0,0.21101760864257812
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1536,8,0,0.04727199971675873
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1536,4,0,0.07612959742546081
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1536,8,0,0.055262398719787595
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1536,16,0,0.049644801020622256
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1536,2,0,0.12137919664382935
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1536,32,0,0.043268799781799316
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,2048,1,0,0.2667344093322754
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,2048,4,0,0.0912223994731903
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1536,64,0,0.04323680102825165
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,2048,8,0,0.05759199857711792
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,2048,16,0,0.04938560128211975
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,2048,32,0,0.0472896009683609
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,2048,64,0,0.04389120042324066
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,2048,2,0,0.15124959945678712
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,2048,1,0,0.2955456018447876
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,2048,2,0,0.1675104022026062
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,2048,8,0,0.06497600078582763
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,2048,16,0,0.05761920213699341
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,2048,4,0,0.1027184009552002
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,2048,64,0,0.051363199949264526
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,2048,32,0,0.05347679853439331
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,3072,4,0,0.1496832013130188
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,3072,16,0,0.06804959774017334
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,3072,8,0,0.09484320282936096
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,3072,2,0,0.2561743974685669
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,3072,1,0,0.47055997848510744
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,3072,64,0,0.05961920022964477
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,3072,32,0,0.06376320123672485
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,3072,2,0,0.2728512048721313
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,3072,4,0,0.16015520095825195
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,3072,1,0,0.488753604888916
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,3072,8,0,0.10374239683151246
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,3072,16,0,0.07447360157966613
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,3072,64,0,0.0657472014427185
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,4096,4,0,0.2200079917907715
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,4096,2,0,0.3870464086532593
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,4096,16,0,0.0867792010307312
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,3072,32,0,0.06809279918670655
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,4096,8,0,0.13704960346221923
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,4096,1,0,0.7288623809814453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,4096,64,0,0.07400799989700317
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,4096,32,0,0.0802191972732544
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,4096,1,0,0.7269023895263672
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,4096,4,0,0.22602880001068115
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,4096,2,0,0.3959775924682617
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,4096,16,0,0.09175840020179749
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,4096,8,0,0.1415328025817871
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,4096,32,0,0.08420479893684388
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,4096,64,0,0.07814720273017883
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,6144,4,0,0.4005760192871094
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,6144,2,0,0.7423664093017578
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,6144,1,0,1.4245823860168456
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,6144,8,0,0.23071041107177734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,6144,16,0,0.14891040325164795
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,6144,64,0,0.10485600233078003
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,6144,32,0,0.11144640445709228
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,6144,4,0,0.3915760040283203
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,6144,1,0,1.3164223670959472
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,6144,2,0,0.7024911880493164
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,6144,32,0,0.11485439538955688
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,6144,8,0,0.2320847988128662
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,6144,16,0,0.15228960514068604
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,6144,64,0,0.10678720474243164
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,8192,16,0,0.22234559059143066
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,8192,32,0,0.14408799409866332
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,8192,8,0,0.3578768014907837
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,8192,64,0,0.13661439418792726
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,8192,2,0,1.2092448234558106
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,8192,4,0,0.634503984451294
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,8192,1,0,2.369206428527832
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,8192,8,0,0.34662880897521975
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,8192,4,0,0.5953855991363526
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,8192,2,0,1.0839776039123534
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,8192,16,0,0.21965439319610597
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,8192,64,0,0.1352303981781006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,8192,1,0,2.0748064041137697
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,8192,32,0,0.14646400213241578
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,10240,4,0,0.9344767570495606
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,10240,16,0,0.30540320873260496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,10240,8,0,0.5116640090942383
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,10240,2,0,1.761636734008789
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,10240,32,0,0.19447200298309325
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,10240,64,0,0.16699999570846558
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,10240,1,0,3.0388864517211913
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,10240,1,0,3.635529708862305
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,10240,4,0,0.8329216003417969
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,10240,8,0,0.475
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,10240,2,0,1.5464367866516113
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,10240,16,0,0.2973167896270752
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,10240,32,0,0.18949919939041138
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,10240,64,0,0.16340160369873047
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,12288,32,0,0.255459189414978
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,12288,8,0,0.6923871994018554
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,12288,64,0,0.1990447998046875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,12288,16,0,0.39818880558013914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,12288,4,0,1.2772175788879394
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,12288,2,0,2.535416030883789
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,12288,1,0,5.165744018554688
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,12288,8,0,0.6288847923278809
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,12288,32,0,0.24809279441833496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,12288,2,0,2.0859888076782225
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,12288,16,0,0.37535359859466555
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,12288,4,0,1.1138591766357422
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,12288,1,0,4.156806564331054
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16384,16,0,0.6400415897369385
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,12288,64,0,0.19351199865341187
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16384,32,0,0.39355518817901614
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16384,4,0,2.1897647857666014
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16384,8,0,1.1342047691345214
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16384,2,0,4.381302261352539
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16384,64,0,0.25921120643615725
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16384,4,0,1.78853759765625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16384,8,0,0.9817808151245118
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16384,2,0,3.459563064575195
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,16384,1,0,9.008968353271484
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16384,32,0,0.3735408067703247
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16384,16,0,0.5769231796264649
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,16384,1,0,7.041146850585937
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16384,64,0,0.2506144046783447
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32768,16,0,2.1191776275634764
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32768,32,0,1.1874192237854004
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32768,64,0,0.7279952049255372
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32768,8,0,4.236406326293945
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32768,4,0,8.49896469116211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32768,8,0,3.26318244934082
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32768,16,0,1.7382848739624024
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32768,2,0,17.043606567382813
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32768,4,0,6.49066390991211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32768,32,0,1.0181952476501466
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32768,64,0,0.6642464160919189
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,1,1,0,0.021507200598716737
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1,16,0,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32768,2,0,12.76296157836914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1,8,0,0.013715200126171112
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1,2,0,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1,32,0,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1,4,0,0.014609600603580474
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1,64,0,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,1,1,0,0.026919999718666078
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1,2,0,0.022771200537681578
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1,32,0,0.020740799605846405
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1,8,0,0.020742399990558623
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,32768,1,0,26.287420654296874
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1,4,0,0.020785599946975708
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1,64,0,0.018857599794864656
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1,16,0,0.020718400180339814
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,32768,1,0,34.20438232421875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,16,1,0,0.02062560021877289
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16,2,0,0.01660960018634796
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16,16,0,0.014667199552059173
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,16,1,0,0.028896000981330872
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16,8,0,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16,4,0,0.014572800695896148
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16,4,0,0.022787199914455415
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16,32,0,0.014545600116252898
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16,2,0,0.022767999768257143
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16,8,0,0.02282399982213974
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16,64,0,0.014539200067520141
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16,16,0,0.020764799416065217
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,32,1,0,0.021643200516700746
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16,32,0,0.02072480022907257
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32,8,0,0.014593599736690522
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32,2,0,0.01722400039434433
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16,64,0,0.02282080054283142
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32,4,0,0.014569599926471711
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32,32,0,0.014584000408649444
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32,16,0,0.014519999921321868
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32,64,0,0.01462240070104599
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32,2,0,0.025089600682258607
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32,32,0,0.022732800245285033
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,32,1,0,0.02943840026855469
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32,4,0,0.02279040068387985
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32,16,0,0.02098720073699951
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32,8,0,0.022836799919605254
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32,64,0,0.022779199481010436
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,64,1,0,0.026873600482940675
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,64,2,0,0.018668800592422485
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,64,4,0,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,64,16,0,0.014575999975204468
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,64,8,0,0.016550399363040924
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,64,32,0,0.014627200365066529
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,64,4,0,0.022864000499248506
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,64,8,0,0.022785599529743194
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,64,64,0,0.014532800018787383
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,64,2,0,0.026873600482940675
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,64,1,0,0.035104000568389894
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,64,16,0,0.022808000445365906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,64,32,0,0.022702400386333466
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,64,64,0,0.02280000001192093
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,128,2,0,0.022761599719524385
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,128,1,0,0.033025598526000975
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,128,16,0,0.015414400398731232
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,128,8,0,0.016627199947834015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,128,32,0,0.014591999351978302
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,128,4,0,0.018702399730682374
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,128,64,0,0.014519999921321868
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,128,2,0,0.03285279870033264
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,128,4,0,0.02887200117111206
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,128,1,0,0.0432671993970871
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,128,8,0,0.024843199551105498
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,128,32,0,0.022787199914455415
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,128,16,0,0.02276960015296936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,256,1,0,0.04952960014343262
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,128,64,0,0.02276639938354492
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,256,2,0,0.031196799874305726
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,256,4,0,0.024833600223064422
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,256,16,0,0.01919520050287247
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,256,32,0,0.018774400651454925
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,256,64,0,0.018615999817848207
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,256,8,0,0.02075359970331192
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,256,1,0,0.06573920249938965
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,256,4,0,0.03506560027599335
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,256,2,0,0.04327679872512817
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,256,8,0,0.03096800148487091
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,256,16,0,0.026919999718666078
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,256,32,0,0.026926401257514953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,512,2,0,0.05784000158309936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,256,64,0,0.02685759961605072
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,512,4,0,0.035087999701499936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,512,1,0,0.09907199740409851
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,512,16,0,0.024830399453639983
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,512,8,0,0.030604800581932066
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,512,64,0,0.022763200104236603
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,512,1,0,0.1213919997215271
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,512,2,0,0.07144160270690918
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,512,4,0,0.04734399914741516
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,512,32,0,0.0247296005487442
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,512,16,0,0.03310079872608185
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,512,32,0,0.031043198704719544
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,1024,1,0,0.20591681003570556
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,512,64,0,0.030737599730491637
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,512,8,0,0.03908959925174713
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1024,4,0,0.07015839815139771
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1024,16,0,0.037745600938796996
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1024,2,0,0.11530560255050659
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1024,64,0,0.030859199166297913
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1024,8,0,0.044116801023483275
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,1024,1,0,0.24378080368041993
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1024,32,0,0.03298879861831665
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1024,4,0,0.08216959834098816
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1024,2,0,0.13797440528869628
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1024,8,0,0.054830402135849
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1024,32,0,0.04115839898586273
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,1536,1,0,0.3388256072998047
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1536,4,0,0.10849440097808838
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1536,2,0,0.18551360368728637
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1024,64,0,0.03908959925174713
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1024,16,0,0.04531520009040833
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1536,8,0,0.06817600131034851
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1536,16,0,0.04932000041007996
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1536,32,0,0.04328640103340149
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1536,64,0,0.04121600091457367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,1536,1,0,0.3840687990188599
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1536,8,0,0.07894560098648071
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1536,16,0,0.05752800107002258
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1536,2,0,0.2139375925064087
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1536,4,0,0.12486239671707153
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1536,64,0,0.04734559953212738
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,2048,1,0,0.5014063835144043
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1536,32,0,0.049718400835990904
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,2048,4,0,0.15576319694519042
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,2048,2,0,0.26723361015319824
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,2048,8,0,0.0947055995464325
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,2048,64,0,0.04880799949169159
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,2048,16,0,0.059880000352859494
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,2048,1,0,0.5454800128936768
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,2048,32,0,0.05355039834976196
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,2048,2,0,0.2977504014968872
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,2048,4,0,0.17057600021362304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,2048,16,0,0.06826559901237488
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,2048,8,0,0.10586400032043457
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,2048,32,0,0.06130239963531494
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,2048,64,0,0.05549439787864685
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,3072,1,0,0.9124640464782715
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,3072,16,0,0.09860640168190002
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,3072,4,0,0.26037120819091797
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,3072,8,0,0.15670080184936525
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,3072,2,0,0.475816011428833
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,3072,64,0,0.06784319877624512
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,3072,32,0,0.07260320186614991
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,3072,1,0,0.9268256187438965
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,3072,2,0,0.49553279876708983
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,3072,4,0,0.27761120796203614
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,3072,64,0,0.07263200283050537
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,3072,8,0,0.16603360176086426
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,3072,16,0,0.10851839780807496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,3072,32,0,0.07909119725227357
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,4096,1,0,1.4232064247131349
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,4096,4,0,0.3945807933807373
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,4096,8,0,0.22648799419403076
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,4096,2,0,0.7370128154754638
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,4096,16,0,0.14244480133056642
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,4096,64,0,0.08424000144004821
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,4096,32,0,0.09096959829330445
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,4096,4,0,0.4036911964416504
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,4096,1,0,1.387923240661621
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,4096,8,0,0.23278720378875734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,4096,2,0,0.7329264163970948
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,4096,32,0,0.09895520210266114
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,4096,64,0,0.09045439958572388
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,4096,16,0,0.14809919595718385
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,6144,2,0,1.4351872444152831
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,6144,1,0,2.9173023223876955
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,6144,32,0,0.1583359956741333
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,6144,64,0,0.12131199836730958
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,6144,16,0,0.23928799629211425
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,6144,4,0,0.751691198348999
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,6144,8,0,0.4073552131652832
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,6144,8,0,0.40388641357421873
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,6144,16,0,0.24407520294189453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,6144,2,0,1.3262384414672852
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,6144,64,0,0.12527040243148804
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,6144,4,0,0.7137119770050049
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,6144,1,0,2.5690080642700197
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,6144,32,0,0.16255359649658202
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,8192,4,0,1.202999973297119
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,8192,2,0,2.358500862121582
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,8192,16,0,0.37137761116027834
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,8192,8,0,0.6537360191345215
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,8192,32,0,0.2341968059539795
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,8192,1,0,4.807494354248047
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,8192,64,0,0.15690879821777343
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,8192,16,0,0.36025118827819824
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,8192,8,0,0.608843183517456
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,8192,2,0,2.07324161529541
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,8192,4,0,1.1001343727111816
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,8192,32,0,0.23721919059753419
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,8192,1,0,4.162564849853515
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,10240,4,0,1.7941648483276367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,10240,32,0,0.3202159881591797
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,10240,16,0,0.5248735904693603
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,10240,2,0,3.636897659301758
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,8192,64,0,0.1594256043434143
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,10240,64,0,0.20421440601348878
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,10240,8,0,0.939459228515625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,10240,1,0,7.4640144348144535
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,10240,4,0,1.5647616386413574
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,10240,8,0,0.8567008018493653
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,10240,2,0,3.0110511779785156
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,10240,32,0,0.3135839939117432
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,10240,16,0,0.49527678489685056
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,10240,1,0,6.142108917236328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,10240,64,0,0.20814080238342286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,12288,16,0,0.709988784790039
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,12288,4,0,2.5360111236572265
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,12288,8,0,1.3038448333740233
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,12288,2,0,5.169094467163086
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,12288,64,0,0.2757983922958374
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,12288,32,0,0.418668794631958
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,12288,8,0,1.1394816398620606
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,12288,4,0,2.1076608657836915
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,12288,2,0,4.240131378173828
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,12288,1,0,10.457335662841796
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,12288,16,0,0.650216007232666
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,12288,32,0,0.3974735975265503
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,12288,64,0,0.26772959232330323
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,12288,1,0,8.543105316162109
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16384,32,0,0.6571248054504395
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16384,16,0,1.1551615715026855
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16384,64,0,0.417468786239624
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16384,8,0,2.181193542480469
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16384,4,0,4.455110549926758
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16384,2,0,8.951337432861328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16384,8,0,1.8149616241455078
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16384,16,0,1.0112208366394042
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16384,4,0,3.5075489044189454
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16384,32,0,0.6063263893127442
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16384,2,0,7.132137298583984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16384,64,0,0.401364803314209
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,16384,1,0,18.165382385253906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,16384,1,0,14.397433471679687
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32768,32,0,2.1931631088256838
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32768,64,0,1.2453311920166015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32768,16,0,4.184672164916992
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32768,8,0,8.566429138183594
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32768,4,0,16.861988830566407
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32768,16,0,3.328327941894531
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32768,8,0,6.391366577148437
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32768,64,0,1.0856639862060546
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32768,32,0,1.7969120025634766
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32768,4,0,12.981283569335938
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,1,1,0,0.02683520019054413
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1,4,0,0.016523200273513793
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1,2,0,0.020665599405765532
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1,16,0,0.01324159950017929
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1,8,0,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1,32,0,0.013953599333763122
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1,64,0,0.01342879980802536
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,1,1,0,0.03107680082321167
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32768,2,0,34.184872436523435
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1,2,0,0.026785600185394286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32768,2,0,26.268722534179688
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1,4,0,0.022862400114536285
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1,8,0,0.02274080067873001
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1,16,0,0.023099200427532197
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1,64,0,0.020689600706100465
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16,2,0,0.020707200467586517
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16,4,0,0.016595199704170227
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1,32,0,0.02282239943742752
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16,8,0,0.014569599926471711
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16,16,0,0.01451839953660965
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,16,1,0,0.026848000288009644
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16,32,0,0.014593599736690522
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16,64,0,0.014548799395561219
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16,2,0,0.02839039862155914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16,4,0,0.022835199534893037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,16,1,0,0.034964799880981445
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16,16,0,0.020737600326538087
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16,8,0,0.02282879948616028
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16,32,0,0.02274080067873001
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16,64,0,0.02075680047273636
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,32,1,0,0.031795200705528257
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,32,4,0,0.01664000004529953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,32,16,0,0.014558400213718414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,32,2,0,0.020742399990558623
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,32,32,0,0.014467200636863709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,32,64,0,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,32,8,0,0.014619199931621552
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,32,1,0,0.04121600091457367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,32,16,0,0.022702400386333466
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,32,2,0,0.028932800889015196
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,32,8,0,0.02279680073261261
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,32768,1,0,52.958184814453126
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,32,32,0,0.02075359970331192
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,32,64,0,0.020793600380420683
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,32,4,0,0.02481279969215393
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,64,1,0,0.03916159868240356
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,32768,1,0,68.7475830078125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,64,2,0,0.026471999287605286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,64,4,0,0.01879200041294098
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,64,32,0,0.015697599947452547
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,64,16,0,0.016654400527477263
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,64,64,0,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,64,8,0,0.016579200327396394
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,64,1,0,0.0493151992559433
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,64,16,0,0.02280000001192093
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,64,8,0,0.02282080054283142
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,64,32,0,0.02274879962205887
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,64,4,0,0.02882719933986664
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,64,2,0,0.03524959981441498
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,128,2,0,0.03291679918766022
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,64,64,0,0.022809599339962006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,128,4,0,0.022755199670791627
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,128,16,0,0.016847999393939973
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,128,1,0,0.05163840055465698
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,128,8,0,0.018676799535751343
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,128,32,0,0.016568000614643096
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,128,2,0,0.045311999320983884
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,128,4,0,0.03300960063934326
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,128,16,0,0.02483679950237274
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,128,1,0,0.06783679723739625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,128,64,0,0.016596800088882445
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,128,32,0,0.022732800245285033
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,128,8,0,0.02691200077533722
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,128,64,0,0.02480800002813339
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,256,1,0,0.09060959815979004
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,256,2,0,0.05081279873847962
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,256,4,0,0.03207359910011291
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,256,32,0,0.020703999698162077
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,256,8,0,0.024852800369262695
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,256,64,0,0.018607999384403228
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,256,16,0,0.020785599946975708
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,256,1,0,0.11498559713363647
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,256,2,0,0.06543200016021729
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,256,4,0,0.043279999494552614
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,256,8,0,0.03670560121536255
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,256,16,0,0.030865600705146788
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,256,32,0,0.028948798775672913
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,256,64,0,0.02682879865169525
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,512,2,0,0.09978399872779846
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,512,4,0,0.05942080020904541
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,512,1,0,0.1754848003387451
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,512,8,0,0.03710240125656128
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,512,32,0,0.026902401447296144
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,512,16,0,0.030955201387405394
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,512,64,0,0.02566719949245453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,512,4,0,0.07394880056381226
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,512,2,0,0.12309919595718384
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,512,1,0,0.21951839923858643
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,512,16,0,0.03980799913406372
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,512,32,0,0.035129600763320924
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,512,64,0,0.03303839862346649
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,512,8,0,0.048619198799133304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1024,2,0,0.20899519920349122
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1024,4,0,0.11913280487060547
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,1024,1,0,0.3856911897659302
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1024,8,0,0.07193120121955872
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1024,16,0,0.046323201060295104
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1024,32,0,0.039211198687553406
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1024,64,0,0.03576160073280334
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,1024,1,0,0.45221757888793945
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1024,2,0,0.24859039783477782
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1024,4,0,0.1417631983757019
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1024,16,0,0.05586879849433899
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1024,8,0,0.0864799976348877
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1024,32,0,0.049414399266242984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1024,64,0,0.044326400756835936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,1536,1,0,0.6506336212158204
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1536,16,0,0.07341759800910949
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1536,4,0,0.18991999626159667
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1536,8,0,0.1144752025604248
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1536,2,0,0.34353759288787844
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1536,32,0,0.0514959990978241
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1536,64,0,0.04731839895248413
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,1536,1,0,0.726361608505249
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1536,2,0,0.39003839492797854
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1536,4,0,0.21828479766845704
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1536,8,0,0.12944960594177246
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1536,16,0,0.08381279706954955
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,2048,1,0,0.9793343544006348
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1536,64,0,0.05546240210533142
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1536,32,0,0.06208800077438355
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,2048,4,0,0.274835205078125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,2048,2,0,0.5047311782836914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,2048,8,0,0.15923839807510376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,2048,32,0,0.0665120005607605
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,2048,16,0,0.09876160025596618
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,2048,64,0,0.059652799367904664
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,2048,4,0,0.30524799823760984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,2048,8,0,0.17714240550994872
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,2048,1,0,1.0437007904052735
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,2048,2,0,0.5503119945526123
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,2048,16,0,0.1129871964454651
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,2048,32,0,0.07668799757957459
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,2048,64,0,0.06782400012016296
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,3072,4,0,0.48177762031555177
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,3072,2,0,0.911736011505127
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,3072,8,0,0.26917281150817873
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,3072,32,0,0.10724320411682128
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,3072,64,0,0.08094559907913208
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,3072,16,0,0.1620911955833435
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,3072,1,0,1.8008367538452148
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,3072,1,0,1.8077775955200195
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,3072,4,0,0.5067520141601562
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,3072,2,0,0.9385168075561523
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,3072,8,0,0.28591361045837405
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,3072,32,0,0.11879199743270874
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,3072,64,0,0.09090880155563355
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,3072,16,0,0.17512799501419068
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,4096,32,0,0.15395679473876953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,4096,16,0,0.23670239448547364
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,4096,8,0,0.40535359382629393
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,4096,4,0,0.7445104122161865
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,4096,1,0,2.923347282409668
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,4096,64,0,0.10284479856491088
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,4096,2,0,1.4437264442443847
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,4096,2,0,1.4052543640136719
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,4096,4,0,0.7469967842102051
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,4096,8,0,0.41955838203430174
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,4096,32,0,0.16461440324783325
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,4096,16,0,0.24919838905334474
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,4096,1,0,2.7081008911132813
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,4096,64,0,0.11342079639434814
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,6144,8,0,0.7613344192504883
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,6144,16,0,0.4281519889831543
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,6144,4,0,1.4433888435363769
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,6144,2,0,2.880411148071289
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,6144,32,0,0.2572767972946167
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,6144,1,0,5.995355224609375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,6144,64,0,0.17575360536575318
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,6144,8,0,0.7351119995117188
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,6144,4,0,1.346348762512207
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,6144,2,0,2.583371162414551
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,6144,32,0,0.26416161060333254
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,6144,16,0,0.4254176139831543
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,6144,64,0,0.18417439460754395
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,6144,1,0,5.147409439086914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,8192,16,0,0.6761375904083252
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,8192,64,0,0.2587104082107544
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,8192,32,0,0.39164319038391116
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,8192,4,0,2.434161567687988
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,8192,8,0,1.227344036102295
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,8192,2,0,4.9607280731201175
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,8192,32,0,0.3905055999755859
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,8192,8,0,1.1296352386474608
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,8192,4,0,2.102657508850098
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,8192,16,0,0.6375984191894531
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,8192,1,0,9.919500732421875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,8192,64,0,0.2651695966720581
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,8192,1,0,8.379431915283202
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,8192,2,0,4.144472122192383
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,10240,8,0,1.8262128829956055
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,10240,32,0,0.5527279853820801
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,10240,16,0,0.9730463981628418
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,10240,4,0,3.6651153564453125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,10240,2,0,7.530840301513672
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,10240,64,0,0.350927996635437
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,10240,4,0,3.0540719985961915
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,10240,8,0,1.599955177307129
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,10240,16,0,0.8907312393188477
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,10240,64,0,0.3509887933731079
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,10240,2,0,6.203718566894532
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,10240,32,0,0.5334991931915283
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,10240,1,0,14.944020080566407
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,12288,32,0,0.7462128162384033
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,12288,8,0,2.5452064514160155
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,10240,1,0,12.450977325439453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,12288,4,0,5.238268661499023
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,12288,16,0,1.3369327545166017
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,12288,64,0,0.4521279811859131
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,12288,2,0,10.587691497802734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,12288,32,0,0.694212818145752
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,12288,16,0,1.1781007766723632
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,12288,8,0,2.1551647186279297
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,12288,4,0,4.0963390350341795
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,12288,64,0,0.43783202171325686
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,12288,2,0,8.567566680908204
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,12288,1,0,21.1337646484375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16384,16,0,2.2072463989257813
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16384,32,0,1.20863037109375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,12288,1,0,17.153535461425783
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16384,8,0,4.492675018310547
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16384,4,0,9.008417510986328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16384,64,0,0.709228801727295
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16384,2,0,18.18671112060547
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16384,8,0,3.596150588989258
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16384,16,0,1.8711519241333008
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16384,32,0,1.0713024139404297
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16384,4,0,7.162115478515625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,1,1,0,0.03666560053825378
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1,2,0,0.027935999631881713
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1,4,0,0.01892160028219223
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16384,64,0,0.6625487804412842
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16384,2,0,14.370176696777344
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1,32,0,0.014497600495815277
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1,64,0,0.014560000598430633
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1,16,0,0.01456640064716339
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1,8,0,0.016641600430011748
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,1,1,0,0.041228801012039185
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1,4,0,0.024876800179481507
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1,8,0,0.022763200104236603
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1,16,0,0.02271520048379898
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1,2,0,0.03297759890556336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1,32,0,0.021836799383163453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,16384,1,0,36.27616577148437
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1,64,0,0.022833600640296936
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,16,1,0,0.0411327987909317
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,16,2,0,0.028543999791145323
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,16,4,0,0.020638400316238405
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,16,8,0,0.0147024005651474
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,16,16,0,0.01462240070104599
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,16384,1,0,29.328475952148438
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,16,64,0,0.014537599682807923
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,16,1,0,0.050651198625564574
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,16,2,0,0.03581439852714539
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,16,4,0,0.027492800354957582
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,16,32,0,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,16,8,0,0.022784000635147093
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,16,32,0,0.020763200521469117
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,16,64,0,0.02269279956817627
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,32,1,0,0.04694559872150421
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,32,2,0,0.030905601382255555
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,16,16,0,0.022705599665641785
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,32,16,0,0.016627199947834015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,32,4,0,0.02250239998102188
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,32,32,0,0.014601600170135499
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,32,64,0,0.014539200067520141
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,32,1,0,0.057596802711486816
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,32,8,0,0.016735999286174773
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,32,2,0,0.04111840128898621
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,32,8,0,0.02486560046672821
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,32,4,0,0.02924000024795532
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,32,32,0,0.022753599286079406
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,32,64,0,0.02075680047273636
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,64,4,0,0.026785600185394286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,64,1,0,0.06198400259017944
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,32,16,0,0.022819200158119203
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,64,8,0,0.01868640035390854
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,64,32,0,0.016628800332546233
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,64,2,0,0.03883520066738129
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,64,64,0,0.014630399644374847
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,64,16,0,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,64,1,0,0.07616320252418518
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,64,2,0,0.049726399779319766
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,64,8,0,0.026876801252365114
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,64,4,0,0.03524320125579834
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,64,16,0,0.025756800174713136
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,64,64,0,0.024745599925518037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,128,2,0,0.05291039943695068
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,128,1,0,0.09212160110473633
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,128,4,0,0.033004799485206605
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,128,8,0,0.02279839962720871
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,64,32,0,0.022809599339962006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,128,32,0,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,128,64,0,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,128,16,0,0.019916799664497376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,128,1,0,0.11855360269546508
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,128,8,0,0.03304319977760315
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,128,4,0,0.04525760114192963
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,128,2,0,0.07012320160865784
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,128,32,0,0.026819199323654175
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,128,16,0,0.02895039916038513
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,128,64,0,0.023737600445747374
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,256,2,0,0.09079840183258056
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,256,4,0,0.053504002094268796
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,256,8,0,0.032969599962234496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,256,16,0,0.026862400770187377
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,256,1,0,0.16184959411621094
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,256,32,0,0.022777600586414336
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,256,2,0,0.11748000383377075
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,256,4,0,0.06775519847869874
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,256,8,0,0.04529919922351837
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,256,16,0,0.03510879874229431
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,256,64,0,0.02258560061454773
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,256,32,0,0.030982398986816408
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,256,64,0,0.028998398780822755
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,256,1,0,0.2040463924407959
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,512,2,0,0.1787791967391968
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,512,1,0,0.32992479801177976
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,512,4,0,0.1029360055923462
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,512,8,0,0.06170079708099365
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,512,32,0,0.03365600109100342
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,512,64,0,0.030947199463844298
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,512,2,0,0.22257120609283448
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,512,16,0,0.03914240002632141
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,512,8,0,0.07647519707679748
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,512,4,0,0.12540800571441652
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,512,16,0,0.05140479803085327
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,512,1,0,0.40494399070739745
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,512,32,0,0.04329920113086701
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,512,64,0,0.037227201461791995
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,1024,1,0,0.7530543804168701
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1024,16,0,0.07867199778556824
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1024,4,0,0.21335198879241943
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1024,8,0,0.12533600330352784
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1024,2,0,0.39115519523620607
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1024,64,0,0.04536960124969482
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1024,2,0,0.45920801162719727
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1024,8,0,0.14823039770126342
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1024,4,0,0.2543519973754883
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1024,16,0,0.09448000192642211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,1024,1,0,0.8683135986328125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1024,64,0,0.05562080144882202
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1024,32,0,0.06424800157546998
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1024,32,0,0.05202239751815796
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1536,4,0,0.350598406791687
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1536,8,0,0.19942400455474854
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,1536,1,0,1.2907999992370605
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1536,16,0,0.12139359712600709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1536,32,0,0.08035359978675842
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1536,2,0,0.6595952033996582
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1536,64,0,0.06055520176887512
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1536,8,0,0.2285520076751709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1536,2,0,0.7366928100585938
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1536,4,0,0.4003727912902832
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1536,32,0,0.09453759789466858
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1536,16,0,0.13888800144195557
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,1536,1,0,1.4131775856018067
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1536,64,0,0.07397760152816772
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,2048,4,0,0.5185376167297363
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,2048,8,0,0.28736319541931155
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,2048,1,0,1.9415695190429687
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,2048,2,0,0.9867183685302734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,2048,16,0,0.1704319953918457
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,2048,32,0,0.11121599674224854
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,2048,64,0,0.07832000255584717
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,2048,4,0,0.5676271915435791
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,2048,8,0,0.32020480632781984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,2048,16,0,0.19275519847869874
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,2048,2,0,1.058448028564453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,2048,32,0,0.12912800312042236
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,2048,1,0,2.0423391342163084
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,2048,64,0,0.09041759967803956
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,3072,4,0,0.9362144470214844
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,3072,8,0,0.4976463794708252
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,3072,2,0,1.812481689453125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,3072,32,0,0.1777135968208313
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,3072,16,0,0.28659520149230955
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,3072,64,0,0.12563040256500244
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,3072,1,0,3.6358081817626955
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,3072,4,0,0.9585087776184082
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,3072,16,0,0.30970718860626223
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,3072,32,0,0.198580801486969
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,3072,8,0,0.5292816162109375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,3072,64,0,0.13982720375061036
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,3072,1,0,3.545948791503906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,3072,2,0,1.8222688674926757
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,4096,8,0,0.7738319873809815
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,4096,4,0,1.4673104286193848
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,4096,16,0,0.42828001976013186
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,4096,32,0,0.2583312034606934
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,4096,2,0,2.918132781982422
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,4096,64,0,0.17746880054473876
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,4096,1,0,5.9630897521972654
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,4096,16,0,0.44670400619506834
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,4096,8,0,0.7780496120452881
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,4096,4,0,1.4310848236083984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,4096,2,0,2.740715217590332
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,4096,32,0,0.277678394317627
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,4096,64,0,0.19387840032577514
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,4096,1,0,5.42470703125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,6144,8,0,1.491652774810791
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,6144,32,0,0.46099200248718264
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,6144,4,0,2.875912094116211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,6144,64,0,0.2918272018432617
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,6144,16,0,0.8024191856384277
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,6144,2,0,5.9652862548828125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,6144,16,0,0.7762239933013916
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,6144,4,0,2.6433759689331056
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,6144,32,0,0.46886401176452636
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,6144,8,0,1.3921520233154296
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,6144,64,0,0.30842080116271975
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,6144,1,0,12.055528259277343
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,6144,2,0,5.217716979980469
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,8192,8,0,2.4442960739135744
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,8192,4,0,5.0443359375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,8192,16,0,1.2740832328796388
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,6144,1,0,10.401976013183594
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,8192,32,0,0.7151984214782715
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,8192,64,0,0.4412496089935303
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,8192,2,0,10.01910858154297
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,8192,16,0,1.1871071815490724
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,8192,4,0,4.187212753295898
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,8192,8,0,2.166326332092285
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,8192,32,0,0.6956624031066895
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,1,1,0,0.055550402402877806
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,8192,64,0,0.44695358276367186
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1,4,0,0.024804799258708952
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1,2,0,0.035068801045417784
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,8192,1,0,20.15358123779297
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,8192,2,0,8.390918731689453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1,8,0,0.02070080041885376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1,16,0,0.016531200706958772
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1,64,0,0.014500799775123595
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,1,1,0,0.0635968029499054
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1,32,0,0.014628799259662628
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,8192,1,0,17.00819091796875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1,8,0,0.026855999231338502
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1,16,0,0.021247999370098115
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1,32,0,0.020766399800777435
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1,64,0,0.020870399475097657
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1,2,0,0.041366401314735415
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1,4,0,0.030931198596954347
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,16,1,0,0.06754400134086609
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,16,16,0,0.01658399999141693
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,16,8,0,0.019780799746513367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,16,4,0,0.0288783997297287
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,16,32,0,0.014558400213718414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,16,2,0,0.04124319851398468
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,16,64,0,0.014526399970054626
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,16,2,0,0.05077599883079529
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,16,4,0,0.035051199793815616
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,16,8,0,0.028889599442481994
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,16,16,0,0.023472000658512116
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,16,1,0,0.07855039834976196
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,16,32,0,0.022843199968338012
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,32,2,0,0.04756160080432892
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,16,64,0,0.022811199724674224
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,32,1,0,0.08115199804306031
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,32,4,0,0.03169119954109192
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,32,32,0,0.01465280055999756
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,32,8,0,0.020755200088024138
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,32,16,0,0.016832000017166136
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,32,1,0,0.09487040042877197
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,32,64,0,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,32,8,0,0.030956798791885377
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,32,16,0,0.026895999908447266
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,32,32,0,0.02481440007686615
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,32,4,0,0.041140800714492796
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,32,2,0,0.05786399841308594
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,64,1,0,0.11230560541152954
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,64,2,0,0.06133120059967041
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,64,4,0,0.03919999897480011
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,32,64,0,0.02276639938354492
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,64,16,0,0.02062239944934845
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,64,32,0,0.01666080057621002
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,64,64,0,0.01595360040664673
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,64,1,0,0.13783520460128784
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,64,8,0,0.026851201057434083
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,64,2,0,0.07699360251426697
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,64,4,0,0.05047839879989624
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,64,16,0,0.0269679993391037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,64,32,0,0.02680160105228424
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,64,8,0,0.03594880104064942
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,128,1,0,0.16809760332107543
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,64,64,0,0.024831999838352204
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,128,4,0,0.05523520112037659
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,128,8,0,0.035062399506568906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,128,16,0,0.024799999594688416
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,128,32,0,0.020744000375270844
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,128,2,0,0.0930783987045288
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,128,1,0,0.21622560024261475
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,128,2,0,0.12054719924926757
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,128,64,0,0.01857919991016388
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,128,8,0,0.045332801342010495
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,128,4,0,0.07013279795646668
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,128,16,0,0.033220800757408145
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,128,32,0,0.028883200883865357
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,256,1,0,0.30355520248413087
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,256,2,0,0.16260160207748414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,128,64,0,0.026899200677871705
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,256,16,0,0.03521920144557953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,256,8,0,0.05554559826850891
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,256,64,0,0.026787200570106508
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,256,4,0,0.09448000192642211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,256,1,0,0.38041760921478274
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,256,2,0,0.2067728042602539
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,256,32,0,0.028961598873138428
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,256,8,0,0.07122560143470764
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,256,16,0,0.04944320023059845
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,256,32,0,0.04118559956550598
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,256,4,0,0.12074400186538696
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,256,64,0,0.03703519999980927
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,512,1,0,0.63962721824646
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,512,2,0,0.3363840103149414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,512,8,0,0.10727039575576783
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,512,16,0,0.06698560118675231
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,512,32,0,0.04532319903373718
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,512,4,0,0.18498879671096802
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,512,2,0,0.41103520393371584
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,512,4,0,0.22872159481048585
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,512,64,0,0.03914560079574585
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,512,1,0,0.7778575897216797
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,512,8,0,0.13271839618682862
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,512,16,0,0.08245760202407837
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,512,32,0,0.05771200060844421
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1024,16,0,0.1370144009590149
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1024,32,0,0.09104639887809754
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,512,64,0,0.0514303982257843
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1024,2,0,0.7605423927307129
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,1024,1,0,1.489345645904541
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1024,8,0,0.22582559585571288
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1024,4,0,0.4021359920501709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1024,64,0,0.06366400122642517
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1024,8,0,0.26900639533996584
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1024,2,0,0.8828880310058593
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,1024,1,0,1.6956735610961915
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1024,32,0,0.11007519960403442
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1024,16,0,0.16363199949264526
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1024,64,0,0.07813439965248108
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1024,4,0,0.47223677635192873
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,1536,1,0,2.590990447998047
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1536,64,0,0.09842560291290284
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1536,16,0,0.21587519645690917
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1536,4,0,0.6774159908294678
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1536,32,0,0.13868319988250732
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1536,2,0,1.2974160194396973
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1536,8,0,0.3671103954315186
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,1536,1,0,2.782699203491211
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1536,32,0,0.16098400354385375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1536,2,0,1.4353903770446776
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1536,8,0,0.4221519947052002
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1536,4,0,0.760097599029541
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1536,16,0,0.25194880962371824
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1536,64,0,0.11796799898147584
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,2048,1,0,3.997043228149414
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,2048,4,0,1.0125184059143066
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,2048,8,0,0.5418447971343994
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,2048,16,0,0.307422399520874
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,2048,32,0,0.1933568000793457
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,2048,64,0,0.13552479743957518
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,2048,2,0,1.9684272766113282
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,2048,16,0,0.34684319496154786
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,2048,4,0,1.0878560066223144
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,2048,32,0,0.22086238861083984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,2048,8,0,0.5941840171813965
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,2048,64,0,0.1568719983100891
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,2048,2,0,2.074046325683594
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,2048,1,0,4.026996612548828
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,3072,8,0,0.9615632057189941
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,3072,32,0,0.3190880060195923
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,3072,4,0,1.8497568130493165
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,3072,2,0,3.6577056884765624
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,3072,64,0,0.2138592004776001
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,3072,16,0,0.532364797592163
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,3072,1,0,7.636609649658203
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,3072,8,0,1.0043744087219237
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,3072,4,0,1.862615966796875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,3072,16,0,0.5710815906524658
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,3072,2,0,3.580419158935547
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,3072,64,0,0.24041919708251952
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,3072,32,0,0.35418879985809326
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,3072,1,0,7.140386962890625
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,4096,8,0,1.502347183227539
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,4096,4,0,2.930860710144043
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,4096,16,0,0.8091440200805664
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,4096,32,0,0.4728720188140869
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,4096,2,0,6.077679824829102
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,4096,64,0,0.30562241077423097
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,4096,16,0,0.8327808380126953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,4096,8,0,1.4907103538513184
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,4096,4,0,2.7972175598144533
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,4096,2,0,5.472196960449219
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,4096,32,0,0.5035823822021485
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,4096,1,0,12.281597137451172
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,1,1,0,0.10144000053405762
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1,2,0,0.056657600402832034
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1,16,0,0.01932799965143204
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,4096,64,0,0.335315203666687
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1,8,0,0.02481119930744171
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,4096,1,0,10.970868682861328
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1,4,0,0.03513120114803314
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1,64,0,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1,2,0,0.06172639727592468
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,1,1,0,0.10393279790878296
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1,32,0,0.016655999422073364
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1,4,0,0.04190559983253479
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1,8,0,0.03094240128993988
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1,64,0,0.022870400547981264
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,16,1,0,0.12154239416122437
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,16,2,0,0.07009599804878235
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1,32,0,0.02473759949207306
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1,16,0,0.025900799036026
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,16,4,0,0.04127840101718903
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,16,16,0,0.02074880003929138
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,16,32,0,0.016527999937534333
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,16,64,0,0.016359999775886536
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,16,8,0,0.028948798775672913
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,16,1,0,0.13500159978866577
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,16,2,0,0.07726560235023498
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,16,16,0,0.028303998708724975
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,16,4,0,0.05133439898490906
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,16,8,0,0.03505280017852783
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,16,64,0,0.022812800109386445
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,16,32,0,0.02280000001192093
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,32,2,0,0.08358399868011475
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,32,4,0,0.04850560128688812
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,32,1,0,0.1515391945838928
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,32,8,0,0.032304000854492185
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,32,64,0,0.016550399363040924
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,32,1,0,0.1728495955467224
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,32,2,0,0.09474400281906128
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,32,16,0,0.022679999470710754
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,32,4,0,0.05967519879341125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,32,16,0,0.030953601002693176
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,32,8,0,0.04120480120182037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,32,32,0,0.018671999871730804
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,32,32,0,0.024966399371623992
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,32,64,0,0.024830399453639983
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,64,1,0,0.21166560649871827
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,64,2,0,0.11409280300140381
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,64,4,0,0.06260640025138856
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,64,8,0,0.04014239907264709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,64,64,0,0.018593600392341612
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,64,1,0,0.25490078926086424
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,64,16,0,0.026929599046707154
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,64,2,0,0.1402783989906311
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,64,32,0,0.020732800662517547
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,64,16,0,0.0389519989490509
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,64,4,0,0.07948319911956787
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,64,32,0,0.028881600499153136
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,64,64,0,0.026892799139022826
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,128,2,0,0.17239199876785277
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,128,1,0,0.3197983980178833
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,64,8,0,0.05196639895439148
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,128,8,0,0.058406400680541995
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,128,16,0,0.037027201056480406
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,128,4,0,0.09680320024490356
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,128,32,0,0.026843199133872987
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,128,64,0,0.022758400440216063
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,128,1,0,0.4052591800689697
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,128,4,0,0.122707200050354
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,128,8,0,0.07411680221557618
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,128,16,0,0.049318400025367734
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,128,64,0,0.03329119980335236
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,128,2,0,0.21768798828125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,256,1,0,0.5797616004943847
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,256,2,0,0.30824639797210696
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,128,32,0,0.03712320029735565
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,256,4,0,0.1711359977722168
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,256,8,0,0.10061119794845581
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,256,16,0,0.060873597860336304
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,256,32,0,0.04117920100688934
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,256,2,0,0.3866463899612427
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,256,1,0,0.7308991909027099
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,256,64,0,0.035099199414253233
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,256,32,0,0.0554639995098114
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,256,8,0,0.12531839609146117
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,256,4,0,0.21348960399627687
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,256,64,0,0.04737760126590729
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,512,1,0,1.2612095832824708
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,256,16,0,0.07838720083236694
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,512,2,0,0.6484399795532226
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,512,8,0,0.19650720357894896
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,512,16,0,0.11923999786376953
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,512,4,0,0.34719200134277345
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,512,64,0,0.05607039928436279
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,512,32,0,0.0786512017250061
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,512,8,0,0.24257440567016603
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,512,16,0,0.14817440509796143
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,512,32,0,0.09889919757843017
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,512,1,0,1.5234432220458984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,512,2,0,0.7910687923431396
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,512,4,0,0.427396821975708
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,512,64,0,0.07189760208129883
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1024,4,0,0.7832223892211914
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1024,8,0,0.42609119415283203
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,1024,1,0,2.998446464538574
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1024,2,0,1.5057439804077148
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1024,16,0,0.2482896089553833
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1024,16,0,0.2954256057739258
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1024,32,0,0.16005120277404786
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1024,32,0,0.19291199445724488
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1024,2,0,1.7300079345703125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1024,64,0,0.11395519971847534
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1024,8,0,0.5023375988006592
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1024,4,0,0.910041618347168
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,1024,1,0,3.358350372314453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1024,64,0,0.13694560527801514
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1536,2,0,2.6359296798706056
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1536,8,0,0.7087567806243896
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1536,16,0,0.4020991802215576
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1536,4,0,1.3316720008850098
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1536,64,0,0.17331520318984986
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1536,32,0,0.2499567985534668
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,1536,1,0,5.316702270507813
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1536,8,0,0.8013711929321289
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1536,4,0,1.4797504425048829
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1536,16,0,0.46331038475036623
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,1536,1,0,5.511579132080078
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1536,2,0,2.830718421936035
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1536,32,0,0.29320321083068845
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1536,64,0,0.20528960227966309
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,2048,8,0,1.052401638031006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,2048,64,0,0.23874239921569823
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,2048,4,0,1.9883104324340821
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,2048,16,0,0.5818672180175781
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,2048,2,0,4.044150543212891
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,2048,32,0,0.35303521156311035
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,2048,16,0,0.6486080169677735
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,2048,64,0,0.27828640937805177
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,2048,4,0,2.12938232421875
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,2048,2,0,4.11732177734375
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,2048,32,0,0.4033040046691895
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,2048,8,0,1.145899200439453
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,2048,1,0,8.198046112060547
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,1,1,0,0.18126720190048218
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1,2,0,0.10460959672927857
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,2048,1,0,8.004074859619141
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1,4,0,0.05600320100784302
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1,16,0,0.024796800315380098
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1,32,0,0.018681600689888
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1,8,0,0.03472639918327332
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,1,1,0,0.18438559770584106
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1,2,0,0.10329600572586059
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1,64,0,0.016568000614643096
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1,8,0,0.043270400166511534
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1,32,0,0.026873600482940675
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1,4,0,0.06374559998512268
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,16,1,0,0.2322000026702881
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1,64,0,0.022801600396633148
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,16,2,0,0.12344640493392944
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1,16,0,0.033011201024055484
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,16,8,0,0.04179680049419403
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,16,4,0,0.06943039894104004
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,16,1,0,0.2525775909423828
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,16,16,0,0.02884959876537323
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,16,32,0,0.02072480022907257
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,16,4,0,0.0786736011505127
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,16,64,0,0.01661919951438904
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,16,2,0,0.1338528037071228
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,16,16,0,0.035113599896430966
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,16,32,0,0.027014398574829103
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,16,8,0,0.05140960216522217
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,32,1,0,0.28650240898132323
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,32,2,0,0.15301120281219482
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,32,16,0,0.03322240114212036
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,32,32,0,0.023001599311828613
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,16,64,0,0.02483839988708496
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,32,8,0,0.04956159889698029
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,32,64,0,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,32,4,0,0.08386719822883607
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,32,2,0,0.17284320592880248
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,32,1,0,0.3205168008804321
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,32,4,0,0.09775040149688721
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,32,8,0,0.06170880198478699
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,32,64,0,0.02688960134983063
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,32,32,0,0.03303999900817871
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,64,2,0,0.21243040561676024
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,64,4,0,0.11766719818115234
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,32,16,0,0.04329439997673035
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,64,8,0,0.06660320162773133
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,64,1,0,0.40083680152893064
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,64,16,0,0.04338400065898895
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,64,32,0,0.03091520071029663
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,64,64,0,0.023263999819755556
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,64,2,0,0.2571984052658081
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,64,1,0,0.4811071872711182
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,64,8,0,0.08168320059776306
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,64,32,0,0.041233599185943604
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,64,64,0,0.032927998900413515
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,64,4,0,0.1419600009918213
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,64,16,0,0.05550240278244019
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,128,8,0,0.10211520195007324
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,128,16,0,0.06373119950294495
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,128,1,0,0.6216527938842773
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,128,4,0,0.17742719650268554
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,128,32,0,0.04320319890975952
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,128,2,0,0.3262736082077026
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,128,4,0,0.22516160011291503
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,128,64,0,0.03300960063934326
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,128,16,0,0.08153759837150573
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,128,2,0,0.41136322021484373
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,128,8,0,0.12934720516204834
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,128,1,0,0.7809696197509766
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,128,32,0,0.055851197242736815
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,256,4,0,0.3177871942520142
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,256,1,0,1.144923210144043
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,256,8,0,0.18079040050506592
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,256,16,0,0.1102720022201538
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,128,64,0,0.04526079893112182
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,256,64,0,0.05340480208396912
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,256,32,0,0.07250400185585022
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,256,2,0,0.5924111843109131
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,256,4,0,0.4028336048126221
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,256,1,0,1.430732822418213
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,256,2,0,0.7474688053131103
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,256,32,0,0.09478880167007446
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,256,8,0,0.22806239128112793
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,256,16,0,0.13956960439682006
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,512,8,0,0.36933441162109376
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,512,16,0,0.21929600238800048
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,512,4,0,0.6709119796752929
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,512,32,0,0.14086560010910035
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,256,64,0,0.06980640292167664
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,512,64,0,0.10266720056533814
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,512,2,0,1.2814928054809571
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,512,1,0,2.497732734680176
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,512,8,0,0.45391201972961426
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,512,32,0,0.17656639814376832
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,512,4,0,0.8206111907958984
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,512,2,0,1.5510687828063965
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,512,64,0,0.12745280265808107
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,512,1,0,3.0103504180908205
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,512,16,0,0.2691967964172363
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1024,8,0,0.8264608383178711
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1024,32,0,0.29395198822021484
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1024,4,0,1.5502448081970215
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1024,64,0,0.2048111915588379
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1024,16,0,0.47059998512268064
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1024,2,0,3.0266048431396486
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,1024,1,0,6.01295051574707
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1024,16,0,0.5581776142120362
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1024,4,0,1.7871583938598632
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1024,8,0,0.9668335914611816
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1024,32,0,0.3537247896194458
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,1024,1,0,6.657084655761719
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1024,2,0,3.4144126892089846
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1024,64,0,0.24826719760894775
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,1,2,0,0.18149759769439697
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,1,16,0,0.03501439988613129
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,1,8,0,0.055902397632598876
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,1,64,0,0.01865919977426529
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,1,1,0,0.34970879554748535
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,1,32,0,0.024849599599838255
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,1,1,0,0.3488879919052124
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,1,2,0,0.18566399812698364
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,1,4,0,0.09779840111732482
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,1,4,0,0.10357919931411744
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,1,64,0,0.024831999838352204
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,16,1,0,0.4406239986419678
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,1,8,0,0.0627888023853302
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,16,4,0,0.12433439493179321
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,16,8,0,0.07120959758758545
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,1,16,0,0.041340801119804385
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,16,2,0,0.2314591884613037
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,16,16,0,0.043305599689483644
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,1,32,0,0.03274720013141632
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,16,1,0,0.4783472061157227
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,16,2,0,0.2531248092651367
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,16,64,0,0.022761599719524385
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,16,32,0,0.028908801078796387
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,16,16,0,0.053513598442077634
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,16,4,0,0.13768960237503053
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,16,64,0,0.030876800417900085
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,32,1,0,0.5547152042388916
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,32,2,0,0.2897104024887085
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,16,8,0,0.08033279776573181
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,16,32,0,0.03711200058460236
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,32,8,0,0.08560640215873719
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,32,32,0,0.037108799815177916
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,32,64,0,0.02677280008792877
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,32,4,0,0.15602560043334962
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,32,16,0,0.05256320238113403
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,32,1,0,0.612007999420166
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,32,8,0,0.09916960000991822
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,32,2,0,0.32325279712677
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,32,32,0,0.047279998660087585
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,32,4,0,0.1773535966873169
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,64,2,0,0.40648798942565917
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,64,1,0,0.7838880062103272
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,32,64,0,0.03712640106678009
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,64,4,0,0.2176111936569214
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,32,16,0,0.06261439919471741
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,64,16,0,0.0710752010345459
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,64,8,0,0.12327040433883667
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,64,64,0,0.03705280125141144
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,64,32,0,0.04921280145645142
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,64,4,0,0.26350080966949463
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,64,16,0,0.08997439742088317
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,64,1,0,0.9341423988342286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,64,2,0,0.48778719902038575
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,64,32,0,0.061806398630142215
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,64,64,0,0.047305598855018616
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,64,8,0,0.14945759773254394
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,128,2,0,0.6341616153717041
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,128,1,0,1.2188096046447754
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,128,4,0,0.3364448070526123
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,128,64,0,0.055497598648071286
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,128,16,0,0.11376479864120484
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,128,32,0,0.0765936017036438
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,128,8,0,0.18873440027236937
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,128,2,0,0.7945775985717773
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,128,8,0,0.23990240097045898
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,128,4,0,0.4246352195739746
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,128,16,0,0.14398720264434814
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,128,1,0,1.5309647560119628
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,128,32,0,0.09649279713630676
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,128,64,0,0.07199199795722962
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,256,2,0,1.1650303840637206
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,256,4,0,0.6141632080078125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,256,8,0,0.3402496099472046
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,256,1,0,2.2650335311889647
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,256,16,0,0.20292000770568847
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,256,64,0,0.09456639885902404
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,256,32,0,0.1334223985671997
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,256,1,0,2.8260000228881834
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,256,4,0,0.7720111846923828
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,256,8,0,0.42833762168884276
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,256,32,0,0.1690719962120056
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,256,2,0,1.4556991577148437
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,256,64,0,0.1228767991065979
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,256,16,0,0.257423996925354
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,512,64,0,0.18481760025024413
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,512,16,0,0.4126160144805908
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,512,32,0,0.2624815940856934
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,512,4,0,1.3248271942138672
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,512,2,0,2.5466127395629883
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,512,8,0,0.7145167827606201
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,512,1,0,5.0496368408203125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,512,8,0,0.8756768226623535
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,512,4,0,1.6067567825317384
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,512,16,0,0.5087200164794922
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,512,2,0,3.070670318603516
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,512,32,0,0.3270816087722778
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,512,1,0,5.9714111328125
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,1,2,0,0.3473407983779907
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,1,4,0,0.18223040103912352
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,512,64,0,0.23305120468139648
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,1,8,0,0.10032960176467895
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,1,1,0,0.7030672073364258
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,1,16,0,0.056803202629089354
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,1,32,0,0.0352400004863739
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,1,1,0,0.6758016109466553
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,1,8,0,0.10549759864807129
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,1,2,0,0.34923200607299804
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,1,64,0,0.024846400320529937
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,1,16,0,0.06310240030288697
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,1,32,0,0.042247998714447024
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,1,4,0,0.18650079965591432
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,1,64,0,0.030964800715446474
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,16,1,0,0.864475154876709
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,16,2,0,0.44267997741699217
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,16,8,0,0.1263919949531555
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,16,16,0,0.0726527988910675
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,16,4,0,0.23794240951538087
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,16,64,0,0.03257760107517242
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,16,32,0,0.04734880030155182
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,16,1,0,0.9275936126708985
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,16,4,0,0.25674240589141845
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,16,2,0,0.480511999130249
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,16,8,0,0.1393728017807007
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,16,16,0,0.08328959941864014
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,16,64,0,0.041223999857902524
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,16,32,0,0.056815999746322635
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,32,2,0,0.5606112003326416
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,32,4,0,0.29380319118499754
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,32,8,0,0.16245280504226683
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,32,16,0,0.0926688015460968
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,32,32,0,0.05818399786949158
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,32,1,0,1.0910351753234864
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,32,64,0,0.043252798914909366
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,32,16,0,0.10885599851608277
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,32,32,0,0.07002879977226258
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,32,4,0,0.3306591987609863
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,32,2,0,0.6178847789764405
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,32,1,0,1.1961376190185546
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,32,8,0,0.18456000089645386
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,32,64,0,0.05284799933433533
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,64,2,0,0.7936783790588379
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,64,4,0,0.4180592060089111
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,64,8,0,0.22948319911956788
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,64,1,0,1.550062370300293
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,64,16,0,0.13352160453796386
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,64,32,0,0.08598880171775818
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,64,8,0,0.2777807950973511
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,64,2,0,0.9470512390136718
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,64,32,0,0.10610560178756714
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,64,64,0,0.059564799070358276
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,64,16,0,0.1639583945274353
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,64,1,0,1.8376432418823243
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,64,64,0,0.07605760097503662
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,64,4,0,0.502627182006836
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,128,2,0,1.2428671836853027
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,128,4,0,0.6521440029144288
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,128,16,0,0.21001598834991456
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,128,1,0,2.4214799880981444
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,128,8,0,0.3576240062713623
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,128,64,0,0.09784640073776245
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,128,32,0,0.13553119897842408
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,128,8,0,0.45319042205810545
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,128,4,0,0.8232288360595703
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,128,2,0,1.5607279777526855
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,128,16,0,0.26686079502105714
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,128,64,0,0.12731679677963256
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,128,1,0,3.0282623291015627
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,128,32,0,0.17216160297393798
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,256,4,0,1.2096176147460938
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,256,8,0,0.6604735851287842
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,256,2,0,2.318507194519043
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,256,32,0,0.24621760845184326
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,256,16,0,0.38374719619750974
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,256,1,0,4.528467178344727
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,256,64,0,0.17787840366363525
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,256,16,0,0.4835663795471191
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,256,8,0,0.8308671951293946
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,256,2,0,2.8780879974365234
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,256,32,0,0.31316800117492677
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,256,1,0,5.614902496337891
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,256,64,0,0.2254512071609497
SGLang,0.5.10,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,256,4,0,1.5210399627685547
