framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1,2,0,0.012598399817943574
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1,1,0,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1,8,0,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,1,64,0,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1,4,0,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1,16,0,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1,1,0,0.01863040030002594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1,32,0,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1,2,0,0.02062560021877289
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1,8,0,0.020619200170040132
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1,4,0,0.018611200153827667
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1,16,0,0.016628800332546233
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16,8,0,0.013643200695514678
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,1,64,0,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16,4,0,0.014401599764823914
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16,16,0,0.012561599910259246
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16,1,0,0.0144896000623703
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16,2,0,0.014508800208568573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1,32,0,0.019539199769496918
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,16,64,0,0.014500799775123595
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16,2,0,0.022694399952888487
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16,4,0,0.02072640061378479
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16,32,0,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16,1,0,0.020670400559902193
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16,8,0,0.019388799369335175
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,16,64,0,0.022731199860572815
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16,16,0,0.020718400180339814
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32,1,0,0.01655679941177368
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32,2,0,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32,4,0,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16,32,0,0.02067999988794327
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32,16,0,0.014460800588130951
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32,32,0,0.014499199390411378
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32,8,0,0.014500799775123595
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32,1,0,0.02269279956817627
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32,2,0,0.020764799416065217
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,32,64,0,0.014451199769973755
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32,32,0,0.020694400370121
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32,16,0,0.020692799985408784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,32,64,0,0.020644800364971162
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32,4,0,0.022673599421977997
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,64,8,0,0.01523360013961792
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32,8,0,0.022697600722312927
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,64,2,0,0.014591999351978302
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,64,16,0,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,64,1,0,0.016547200083732606
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,64,4,0,0.014550399780273438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,64,1,0,0.02481600046157837
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,64,32,0,0.014497600495815277
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,64,2,0,0.02279040068387985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,64,64,0,0.012483199685811996
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,64,8,0,0.02267040014266968
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,64,64,0,0.020692799985408784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,64,16,0,0.022726400196552275
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,128,1,0,0.01865279972553253
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,64,4,0,0.022788800299167633
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,64,32,0,0.02274720072746277
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,128,8,0,0.01653439998626709
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,128,2,0,0.016673600673675536
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,128,16,0,0.014985600113868713
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,128,4,0,0.01454080045223236
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,128,64,0,0.014614400267601014
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,128,2,0,0.024809600412845613
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,128,32,0,0.014726400375366211
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,128,16,0,0.022961600124835967
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,128,4,0,0.023425599932670592
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,128,8,0,0.024750399589538574
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,128,32,0,0.02276639938354492
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,128,1,0,0.026859200000762938
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,128,64,0,0.022707200050354003
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,256,4,0,0.020712000131607056
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,256,8,0,0.01865919977426529
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,256,1,0,0.02609120011329651
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,256,2,0,0.020667199790477753
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,256,32,0,0.018675200641155243
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,256,1,0,0.0330128014087677
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,256,16,0,0.018680000305175783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,256,2,0,0.030876800417900085
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,256,4,0,0.028863999247550964
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,256,64,0,0.016676799952983858
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,256,8,0,0.026907199621200563
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,256,32,0,0.024859200417995452
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,256,64,0,0.024780799448490144
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,256,16,0,0.025036799907684325
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,512,2,0,0.028880000114440918
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,512,4,0,0.024747200310230255
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,512,1,0,0.03500800132751465
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,512,32,0,0.02075359970331192
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,512,64,0,0.020745599269866945
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,512,2,0,0.03705599904060364
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,512,16,0,0.022593599557876588
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,512,1,0,0.04531359970569611
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,512,4,0,0.03300319910049439
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,512,8,0,0.022804799675941467
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,512,64,0,0.030859199166297913
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,512,16,0,0.028905600309371948
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1024,4,0,0.037092798948287965
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1024,2,0,0.04299359917640686
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,512,8,0,0.030905601382255555
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1024,1,0,0.06789119839668274
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,512,32,0,0.02890239953994751
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1024,16,0,0.03094879984855652
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,1024,64,0,0.02890239953994751
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1024,32,0,0.02890399992465973
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1024,1,0,0.08229600191116333
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1024,8,0,0.0329263985157013
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1024,8,0,0.039134401082992556
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1024,16,0,0.03641439974308014
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1024,32,0,0.037031999230384825
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1024,2,0,0.05221760272979736
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1536,2,0,0.06656960248947144
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1024,4,0,0.043844801187515256
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1536,4,0,0.047332799434661864
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1536,8,0,0.04141919910907745
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,1024,64,0,0.035006400942802426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1536,16,0,0.03913759887218475
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1536,1,0,0.10658080577850342
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,1536,64,0,0.0369951993227005
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1536,2,0,0.07604479789733887
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1536,4,0,0.05551999807357788
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1536,16,0,0.0432671993970871
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1536,1,0,0.12156959772109985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1536,8,0,0.04746719896793365
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1536,32,0,0.03712640106678009
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,2048,1,0,0.15070559978485107
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,1536,64,0,0.04317440092563629
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,2048,2,0,0.09057440161705017
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,2048,4,0,0.057580798864364624
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,2048,64,0,0.04509280025959015
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,2048,16,0,0.047295999526977536
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,2048,2,0,0.1022271990776062
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1536,32,0,0.043303999304771426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,2048,1,0,0.16835520267486573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,2048,8,0,0.051444798707962036
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,2048,4,0,0.06699519753456115
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,2048,8,0,0.05756480097770691
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,2048,32,0,0.04531520009040833
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,2048,16,0,0.05184320211410522
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,2048,32,0,0.04938240051269531
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,3072,1,0,0.2540015935897827
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,3072,4,0,0.09562879800796509
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,3072,8,0,0.06819199919700622
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,3072,32,0,0.05963199734687805
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,3072,64,0,0.05952960252761841
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,3072,2,0,0.14983680248260497
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,2048,64,0,0.04945439994335175
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,3072,2,0,0.16005120277404786
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,3072,4,0,0.10284320116043091
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,3072,16,0,0.06373599767684937
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,3072,16,0,0.06980320215225219
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,3072,8,0,0.07549600005149841
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,4096,2,0,0.2190095901489258
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,4096,1,0,0.3882080078125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,3072,32,0,0.0642799973487854
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,3072,1,0,0.2715023994445801
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,3072,64,0,0.061694401502609256
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,4096,8,0,0.08690239787101746
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,4096,4,0,0.13649920225143433
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,4096,16,0,0.07822080254554749
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,4096,32,0,0.07394400238990784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,4096,64,0,0.07311999797821045
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,4096,8,0,0.09204800128936767
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,4096,1,0,0.3972383975982666
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,4096,64,0,0.07807360291481018
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,4096,32,0,0.07961279749870301
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,4096,4,0,0.14382079839706421
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,6144,1,0,0.7369616031646729
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,4096,16,0,0.08287039995193482
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,6144,2,0,0.39795360565185545
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,4096,2,0,0.22871999740600585
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,6144,4,0,0.23067679405212402
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,6144,8,0,0.1505568027496338
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,6144,16,0,0.11299840211868287
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,6144,64,0,0.10311679840087891
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,6144,2,0,0.39218080043792725
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,6144,32,0,0.1068303942680359
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,6144,8,0,0.15146080255508423
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,6144,16,0,0.11494560241699218
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,6144,1,0,0.7032303810119629
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,6144,32,0,0.10675359964370727
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,6144,64,0,0.10469759702682495
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,6144,4,0,0.23094398975372316
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,8192,8,0,0.2224735975265503
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,8192,4,0,0.3585903882980347
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,8192,32,0,0.13562239408493043
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,8192,2,0,0.6377056121826172
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,8192,16,0,0.1460271954536438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,8192,1,0,1.1864687919616699
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,8192,4,0,0.34650559425354005
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,8192,2,0,0.595147180557251
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,8192,8,0,0.22094399929046632
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,8192,64,0,0.1318063974380493
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,8192,16,0,0.14626879692077638
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,8192,1,0,1.0866975784301758
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,10240,4,0,0.504423999786377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,10240,2,0,0.9179183959960937
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,10240,1,0,1.7518495559692382
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,10240,16,0,0.19204479455947876
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,8192,64,0,0.13133440017700196
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,8192,32,0,0.13566720485687256
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,10240,64,0,0.16207040548324586
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,10240,32,0,0.1671887993812561
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,10240,8,0,0.3066672086715698
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,10240,4,0,0.47751197814941404
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,10240,8,0,0.29493439197540283
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,10240,2,0,0.8372336387634277
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,10240,16,0,0.18967039585113527
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,10240,1,0,1.5498016357421875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,10240,32,0,0.16447839736938477
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,12288,2,0,1.2616496086120605
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,12288,4,0,0.6900224208831787
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,10240,64,0,0.15992480516433716
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,12288,8,0,0.3961472034454346
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,12288,1,0,2.4682464599609375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,12288,32,0,0.19915839433670043
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,12288,8,0,0.3750960111618042
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,12288,16,0,0.2563904047012329
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,12288,2,0,1.117420768737793
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,12288,1,0,2.086129570007324
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,12288,64,0,0.19121600389480592
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,12288,32,0,0.19334080219268798
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,12288,4,0,0.6280128002166748
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,12288,16,0,0.24790239334106445
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,12288,64,0,0.1844480037689209
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16384,16,0,0.39585280418395996
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16384,2,0,2.108415985107422
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,16384,64,0,0.2518944025039673
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16384,4,0,1.1231488227844237
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16384,32,0,0.26219360828399657
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16384,8,0,0.6320608139038086
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16384,1,0,4.247415924072266
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16384,1,0,3.419790267944336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16384,8,0,0.5788032054901123
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16384,16,0,0.3721359968185425
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16384,32,0,0.25061759948730467
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16384,2,0,1.7910367965698242
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16384,4,0,0.9859807968139649
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32768,16,0,1.1869071960449218
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32768,8,0,2.1155231475830076
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,32768,64,0,0.4946688175201416
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,16384,64,0,0.23987839221954346
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32768,4,0,4.257400131225586
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32768,32,0,0.7307631969451904
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32768,2,0,8.31427230834961
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32768,4,0,3.2005313873291015
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32768,8,0,1.742919921875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32768,32,0,0.6622608184814454
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32768,16,0,1.0265983581542968
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32768,2,0,6.275408172607422
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,32768,64,0,0.45926718711853026
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1,1,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1,2,0,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1,4,0,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1,8,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32768,1,0,16.518971252441407
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32768,1,0,12.661879730224609
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1,32,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1,2,0,0.02273920029401779
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1,16,0,0.013156799972057343
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,1,64,0,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1,1,0,0.02274879962205887
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1,4,0,0.02067680060863495
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1,32,0,0.020657600462436677
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,1,64,0,0.022699199616909027
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1,8,0,0.02062239944934845
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16,2,0,0.014611199498176575
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16,16,0,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16,8,0,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16,4,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1,16,0,0.02067999988794327
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16,1,0,0.016502399742603303
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16,32,0,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16,2,0,0.022758400440216063
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16,4,0,0.0208064004778862
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16,1,0,0.02272160053253174
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,16,64,0,0.014468799531459808
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16,8,0,0.021550400555133818
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16,32,0,0.02072640061378479
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16,16,0,0.020689600706100465
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32,1,0,0.01661120057106018
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,16,64,0,0.022728000581264497
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32,2,0,0.014523200690746307
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32,4,0,0.015352000296115876
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32,8,0,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32,16,0,0.014593599736690522
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32,32,0,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32,2,0,0.022767999768257143
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,32,64,0,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32,4,0,0.022703999280929567
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32,8,0,0.02157920002937317
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32,1,0,0.024846400320529937
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32,16,0,0.022787199914455415
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,32,64,0,0.020732800662517547
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,64,2,0,0.016638399660587312
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32,32,0,0.022819200158119203
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,64,4,0,0.014606399834156037
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,64,8,0,0.014475199580192565
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,64,1,0,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,64,32,0,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,64,1,0,0.026902401447296144
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,64,64,0,0.014595200121402741
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,64,4,0,0.02279199957847595
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,64,16,0,0.014571200311183929
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,64,8,0,0.022782400250434875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,64,2,0,0.024822400510311128
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,64,32,0,0.022756800055503845
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,128,1,0,0.022731199860572815
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,64,64,0,0.021116800606250763
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,64,16,0,0.022811199724674224
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,128,4,0,0.017313599586486816
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,128,2,0,0.01863040030002594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,128,16,0,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,128,32,0,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,128,64,0,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,128,8,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,128,1,0,0.03094559907913208
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,128,16,0,0.022784000635147093
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,128,32,0,0.02269600033760071
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,128,4,0,0.024798400700092316
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,128,8,0,0.022785599529743194
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,128,64,0,0.02268480062484741
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,256,2,0,0.026764801144599913
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,128,2,0,0.026881599426269533
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,256,8,0,0.02067999988794327
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,256,4,0,0.022707200050354003
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,256,16,0,0.018649600446224213
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,256,1,0,0.030934399366378783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,256,32,0,0.018806399405002595
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,256,64,0,0.0186831995844841
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,256,4,0,0.030747199058532716
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,256,1,0,0.043222400546073916
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,256,8,0,0.028523200750350954
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,256,16,0,0.02686080038547516
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,256,2,0,0.03507040143013
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,512,1,0,0.05795360207557678
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,512,2,0,0.03505760133266449
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,512,8,0,0.024780799448490144
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,256,64,0,0.026791998744010927
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,512,4,0,0.02884640097618103
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,256,32,0,0.026855999231338502
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,512,64,0,0.022724799811840057
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,512,16,0,0.022780799865722658
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,512,1,0,0.07192800045013428
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,512,2,0,0.047275200486183167
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,512,8,0,0.033004799485206605
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,512,4,0,0.03712640106678009
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,512,32,0,0.022759999334812164
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1024,1,0,0.11622719764709473
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,512,32,0,0.03102239966392517
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1024,2,0,0.06974719762802124
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1024,4,0,0.04327200055122375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,512,64,0,0.030857598781585692
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1024,8,0,0.03709760010242462
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,512,16,0,0.030990400910377504
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1024,32,0,0.030958399176597595
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,1024,64,0,0.030956798791885377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1024,1,0,0.13832319974899293
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1024,2,0,0.08269439935684204
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1024,4,0,0.05400000214576721
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1024,16,0,0.03309600055217743
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1024,8,0,0.04529280066490173
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1024,32,0,0.0391728013753891
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,1024,64,0,0.037038400769233704
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1536,1,0,0.18656959533691406
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1536,2,0,0.11009119749069214
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1536,4,0,0.06805920004844665
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1536,16,0,0.04360159933567047
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1536,32,0,0.04111840128898621
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1024,16,0,0.04123679995536804
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1536,8,0,0.04799680113792419
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,1536,64,0,0.039201599359512326
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1536,1,0,0.2138223886489868
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1536,2,0,0.12399040460586548
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1536,4,0,0.07969440221786499
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1536,16,0,0.049374398589134214
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1536,8,0,0.05772960186004639
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,2048,2,0,0.15336159467697144
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,2048,1,0,0.26879680156707764
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,2048,4,0,0.093886399269104
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,2048,8,0,0.06011199951171875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,1536,64,0,0.04654879868030548
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,2048,16,0,0.05351520180702209
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1536,32,0,0.0473471999168396
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,2048,32,0,0.04908480048179627
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,2048,64,0,0.04733920097351074
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,2048,1,0,0.2982975959777832
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,2048,2,0,0.17152960300445558
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,2048,4,0,0.10599039793014527
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,2048,16,0,0.06082080006599426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,2048,64,0,0.05346879959106445
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,2048,8,0,0.06791999936103821
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,3072,1,0,0.4703839778900146
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,3072,4,0,0.1552288055419922
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,3072,2,0,0.2592655897140503
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,3072,8,0,0.09999359846115112
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,2048,32,0,0.05552800297737122
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,3072,16,0,0.07280640006065368
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,3072,32,0,0.06783519983291626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,3072,1,0,0.495136022567749
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,3072,2,0,0.27774720191955565
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,3072,4,0,0.16636799573898314
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,3072,32,0,0.07394559979438782
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,3072,64,0,0.06380000114440917
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,3072,64,0,0.07011359930038452
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,3072,8,0,0.10777759552001953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,3072,16,0,0.08052480220794678
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,4096,2,0,0.39427039623260496
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,4096,4,0,0.22492001056671143
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,4096,1,0,0.728275203704834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,4096,16,0,0.09127680063247681
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,4096,8,0,0.14195200204849243
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,4096,32,0,0.08467360138893128
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,4096,64,0,0.0818880021572113
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,4096,4,0,0.23322560787200927
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,4096,2,0,0.40552639961242676
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,4096,16,0,0.0991807997226715
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,4096,8,0,0.149017596244812
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,4096,64,0,0.08624640107154846
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,4096,32,0,0.09032480120658874
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,4096,1,0,0.7330992221832275
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,6144,1,0,1.411228847503662
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,6144,8,0,0.23987040519714356
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,6144,4,0,0.410478401184082
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,6144,2,0,0.7471439838409424
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,6144,16,0,0.1583184003829956
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,6144,32,0,0.12190239429473877
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,6144,64,0,0.1132207989692688
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,6144,4,0,0.40288481712341306
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,6144,8,0,0.24229280948638915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,6144,2,0,0.7122943878173829
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,6144,16,0,0.16331199407577515
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,6144,64,0,0.11893279552459717
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,6144,32,0,0.12514400482177734
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,6144,1,0,1.3268560409545898
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,8192,1,0,2.3261648178100587
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,8192,4,0,0.6505424022674561
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,8192,16,0,0.23648159503936766
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,8192,2,0,1.1938336372375489
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,8192,32,0,0.15701440572738648
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,8192,8,0,0.36856319904327395
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,8192,64,0,0.1486143946647644
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,8192,8,0,0.3607503890991211
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,8192,32,0,0.15997600555419922
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,8192,64,0,0.15180799961090088
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,8192,16,0,0.23642559051513673
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,8192,2,0,1.10164155960083
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,10240,1,0,3.5959182739257813
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,8192,1,0,2.0760208129882813
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,8192,4,0,0.6131599903106689
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,10240,16,0,0.3208688020706177
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,10240,4,0,0.9376735687255859
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,10240,8,0,0.5219776153564453
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,10240,2,0,1.7791648864746095
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,10240,32,0,0.20620479583740234
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,10240,16,0,0.31405279636383054
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,10240,4,0,0.8577695846557617
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,10240,64,0,0.18125120401382447
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,10240,32,0,0.20612640380859376
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,10240,8,0,0.49700322151184084
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,10240,2,0,1.5726240158081055
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,10240,64,0,0.1834336042404175
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,10240,1,0,2.999294471740723
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,12288,16,0,0.41581120491027834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,12288,8,0,0.7106175899505616
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,12288,2,0,2.4465919494628907
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,12288,4,0,1.2741920471191406
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,12288,1,0,4.987838363647461
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,12288,64,0,0.21539199352264404
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,12288,32,0,0.27350080013275146
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,12288,4,0,1.1416128158569336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,12288,2,0,2.110972785949707
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,12288,64,0,0.21325600147247314
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,12288,16,0,0.39803519248962405
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,12288,8,0,0.6511168003082275
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,12288,32,0,0.26876320838928225
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,12288,1,0,4.129401779174804
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16384,4,0,2.1483280181884767
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16384,8,0,1.152449607849121
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16384,16,0,0.6573423862457275
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16384,2,0,4.300804901123047
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16384,32,0,0.41871042251586915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,16384,64,0,0.28546719551086425
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16384,1,0,8.902983856201171
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16384,8,0,1.0169391632080078
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,16384,64,0,0.2810512065887451
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16384,32,0,0.40194239616394045
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16384,4,0,1.8199888229370118
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16384,2,0,3.4575008392333983
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16384,16,0,0.6071360111236572
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16384,1,0,6.917212677001953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32768,32,0,1.2325759887695313
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,32768,64,0,0.7772784233093262
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32768,8,0,4.173896026611328
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32768,16,0,2.156056022644043
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32768,4,0,8.21072006225586
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32768,16,0,1.80426082611084
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32768,8,0,3.2327232360839844
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32768,4,0,6.329708862304687
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32768,32,0,1.085092830657959
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,32768,64,0,0.7172736167907715
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32768,2,0,16.582553100585937
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1,1,0,0.022920000553131103
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1,4,0,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32768,2,0,12.779558563232422
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1,2,0,0.016518400609493257
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1,32,0,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1,16,0,0.014499199390411378
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,1,64,0,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1,4,0,0.020729599893093108
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1,1,0,0.026441600918769837
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1,8,0,0.022793599963188173
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1,2,0,0.02277279943227768
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1,8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1,16,0,0.022804799675941467
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1,32,0,0.02077440023422241
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,1,64,0,0.020720000565052032
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16,1,0,0.01871200054883957
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16,2,0,0.01650719940662384
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32768,1,0,33.665402221679685
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16,16,0,0.014468799531459808
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16,8,0,0.014550399780273438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16,32,0,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32768,1,0,25.481159973144532
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,16,64,0,0.014480000734329224
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16,4,0,0.014470399916172027
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16,1,0,0.027379199862480164
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16,4,0,0.022780799865722658
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16,16,0,0.021721599996089934
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16,8,0,0.02078399956226349
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,16,64,0,0.02279520034790039
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,32,2,0,0.016590400040149687
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,32,4,0,0.016521599888801575
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16,2,0,0.022801600396633148
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,32,8,0,0.014582400023937226
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,32,16,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,32,1,0,0.02104319930076599
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,32,32,0,0.014511999487876893
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,32,64,0,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,32,1,0,0.02892639935016632
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,32,4,0,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16,32,0,0.022678400576114654
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,32,8,0,0.022804799675941467
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,32,2,0,0.0248879998922348
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,32,32,0,0.02271360009908676
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,32,64,0,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,64,2,0,0.018588800728321076
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,64,1,0,0.024795199930667877
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,32,16,0,0.022755199670791627
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,64,8,0,0.01631679981946945
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,64,32,0,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,64,1,0,0.03506560027599335
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,64,64,0,0.014499199390411378
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,64,2,0,0.026862400770187377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,64,16,0,0.01459999978542328
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,64,4,0,0.02470880001783371
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,64,8,0,0.024804799258708952
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,64,16,0,0.02279199957847595
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,64,4,0,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,64,32,0,0.022707200050354003
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,64,64,0,0.020688000321388244
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,128,1,0,0.03251200020313263
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,128,2,0,0.022697600722312927
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,128,8,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,128,16,0,0.01637440025806427
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,128,64,0,0.014575999975204468
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,128,4,0,0.018760000169277192
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,128,1,0,0.04530239999294281
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,128,2,0,0.03301759958267212
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,128,4,0,0.026888000965118408
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,128,32,0,0.01656640022993088
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,128,32,0,0.02280319929122925
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,128,16,0,0.024750399589538574
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,128,8,0,0.024798400700092316
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,256,1,0,0.05141440033912659
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,256,2,0,0.03292959928512573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,128,64,0,0.022753599286079406
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,256,4,0,0.026830399036407472
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,256,16,0,0.020633600652217865
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,256,32,0,0.020662400126457214
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,256,64,0,0.018680000305175783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,256,1,0,0.06462879776954651
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,256,2,0,0.04314720034599304
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,256,8,0,0.022761599719524385
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,256,4,0,0.035020801424980166
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,256,16,0,0.02890399992465973
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,256,8,0,0.030876800417900085
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,256,32,0,0.02682720124721527
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,512,1,0,0.09893760085105896
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,256,64,0,0.0284527987241745
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,512,2,0,0.058824002742767334
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,512,4,0,0.03701600134372711
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,512,16,0,0.026872000098228453
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,512,8,0,0.030969598889350893
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,512,64,0,0.024743999540805816
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,512,1,0,0.12320640087127685
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,512,32,0,0.025358399748802184
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,512,2,0,0.07341759800910949
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,512,4,0,0.04939199984073639
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,512,16,0,0.03508639931678772
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,512,64,0,0.03306559920310974
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1024,1,0,0.20779359340667725
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1024,4,0,0.07235360145568848
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1024,8,0,0.04702720046043396
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,512,8,0,0.039155200123786926
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,512,32,0,0.03299199938774109
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1024,2,0,0.11971999406814575
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1024,16,0,0.039166399836540224
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1024,32,0,0.037049600481987
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,1024,64,0,0.03473759889602661
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1024,1,0,0.24704480171203613
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1024,4,0,0.08549759984016418
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1024,2,0,0.14193439483642578
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1024,16,0,0.049377599358558656
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1024,8,0,0.05596960186958313
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1024,32,0,0.04323999881744385
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,1024,64,0,0.04314720034599304
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1536,2,0,0.19100960493087768
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1536,1,0,0.3412544012069702
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1536,4,0,0.11400959491729737
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1536,16,0,0.05348640084266663
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1536,8,0,0.07199839949607849
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1536,32,0,0.04740320146083832
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,1536,64,0,0.04529759883880615
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1536,2,0,0.21784160137176514
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1536,4,0,0.13019200563430786
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1536,1,0,0.3890176057815552
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1536,16,0,0.0622111976146698
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1536,8,0,0.08421120047569275
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,1536,64,0,0.05344480276107788
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1536,32,0,0.05560799837112427
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,2048,1,0,0.5013328075408936
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,2048,2,0,0.273638391494751
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,2048,4,0,0.15921759605407715
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,2048,16,0,0.06616320013999939
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,2048,8,0,0.10009759664535522
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,2048,32,0,0.059595197439193726
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,2048,64,0,0.055478399991989134
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,2048,4,0,0.17701120376586915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,2048,2,0,0.3035775899887085
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,2048,8,0,0.11148960590362549
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,2048,1,0,0.5502912044525147
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,2048,16,0,0.07626399993896485
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,2048,32,0,0.0680895984172821
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,2048,64,0,0.06270080208778381
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,3072,1,0,0.9170175552368164
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,3072,4,0,0.2685728073120117
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,3072,32,0,0.08106080293655396
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,3072,64,0,0.07600319981575013
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,3072,16,0,0.10831680297851562
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,3072,1,0,0.93787841796875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,3072,2,0,0.4803215980529785
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,3072,8,0,0.16077280044555664
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,3072,8,0,0.17581919431686402
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,3072,16,0,0.11899839639663697
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,3072,2,0,0.5074960231781006
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,3072,4,0,0.2878927946090698
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,3072,32,0,0.09156320095062256
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,3072,64,0,0.08420640230178833
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,4096,4,0,0.4039680004119873
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,4096,8,0,0.23649919033050537
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,4096,32,0,0.1043280005455017
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,4096,64,0,0.09652799963951111
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,4096,16,0,0.1540511965751648
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,4096,1,0,1.4207072257995605
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,4096,1,0,1.406875228881836
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,4096,2,0,0.7451119899749756
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,4096,4,0,0.4175055980682373
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,4096,2,0,0.750113582611084
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,4096,8,0,0.24993278980255126
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,4096,16,0,0.16430720090866088
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,4096,32,0,0.1141152024269104
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,6144,16,0,0.2561503887176514
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,4096,64,0,0.1047104001045227
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,6144,32,0,0.1771216034889221
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,6144,8,0,0.4261888027191162
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,6144,1,0,2.8491231918334963
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,6144,4,0,0.7642255783081054
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,6144,2,0,1.4382047653198242
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,6144,64,0,0.13836480379104615
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,6144,32,0,0.18350080251693726
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,6144,8,0,0.42522878646850587
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,6144,4,0,0.7345856189727783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,6144,1,0,2.580606460571289
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,6144,64,0,0.14588479995727538
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,6144,16,0,0.2646703958511353
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,6144,2,0,1.351153564453125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,8192,1,0,4.766136169433594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,8192,8,0,0.6678256034851074
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,8192,4,0,1.2250816345214843
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,8192,2,0,2.38939208984375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,8192,32,0,0.25910239219665526
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,8192,64,0,0.17947360277175903
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,8192,16,0,0.39187839031219485
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,8192,8,0,0.6390495777130127
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,8192,4,0,1.130617618560791
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,8192,32,0,0.26485600471496584
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,8192,64,0,0.18934400081634523
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,8192,1,0,4.099391937255859
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,8192,16,0,0.3910576105117798
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,8192,2,0,2.1111824035644533
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,10240,16,0,0.5504640102386474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,10240,8,0,0.9683407783508301
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,10240,4,0,1.7862512588500976
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,10240,2,0,3.577017593383789
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,10240,32,0,0.3517920017242432
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,10240,64,0,0.2342303991317749
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,10240,1,0,7.385662078857422
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,10240,32,0,0.3515791893005371
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,10240,16,0,0.5343408107757568
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,10240,2,0,3.0207727432250975
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,10240,64,0,0.24368960857391359
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,10240,8,0,0.8951456069946289
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,10240,1,0,6.032521438598633
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,10240,4,0,1.6015119552612305
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,12288,16,0,0.7379424095153808
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,12288,32,0,0.452891206741333
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,12288,4,0,2.4849679946899412
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,12288,8,0,1.3220848083496093
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,12288,2,0,5.057447814941407
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,12288,64,0,0.3100303888320923
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,12288,1,0,10.343334197998047
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,12288,4,0,2.154867172241211
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,12288,16,0,0.6937024116516113
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,12288,8,0,1.1823200225830077
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,12288,64,0,0.3135839939117432
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,12288,2,0,4.131379318237305
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,12288,32,0,0.4413919925689697
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,12288,1,0,8.303603363037109
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16384,16,0,1.1893872261047362
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16384,8,0,2.1693359375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16384,4,0,4.4172607421875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16384,32,0,0.7050079822540283
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,16384,64,0,0.4707920074462891
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16384,2,0,8.854203033447266
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16384,4,0,3.4896846771240235
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16384,32,0,0.6667664051055908
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16384,16,0,1.0761664390563965
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16384,8,0,1.875004768371582
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16384,2,0,6.940777587890625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1,1,0,0.025740799307823182
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1,2,0,0.02069920003414154
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,16384,64,0,0.45939998626708983
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1,4,0,0.014620800316333771
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16384,1,0,17.8877197265625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1,32,0,0.014511999487876893
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1,8,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,1,64,0,0.013264000415802002
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1,1,0,0.032995200157165526
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1,2,0,0.026899200677871705
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1,16,0,0.014460800588130951
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16384,1,0,13.939219665527343
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1,8,0,0.020707200467586517
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,1,64,0,0.020720000565052032
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1,32,0,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1,16,0,0.02282879948616028
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,16,1,0,0.026790401339530943
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1,4,0,0.022681599855422972
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,16,8,0,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,16,16,0,0.01449120044708252
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,16,32,0,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,16,2,0,0.020627200603485107
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,16,64,0,0.014523200690746307
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,16,1,0,0.034944000840187076
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,16,4,0,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,16,2,0,0.028947201371192933
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,16,8,0,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,16,16,0,0.022731199860572815
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,16,64,0,0.022835199534893037
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,32,1,0,0.030961599946022034
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,16,32,0,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,16,4,0,0.02473919987678528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,32,8,0,0.016553600132465363
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,32,2,0,0.02141759991645813
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,32,4,0,0.016631999611854555
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,32,16,0,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,32,64,0,0.014612799882888794
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,32,32,0,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,32,4,0,0.02473919987678528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,32,1,0,0.039155200123786926
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,32,16,0,0.02088160067796707
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,32,8,0,0.022787199914455415
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,32,32,0,0.020720000565052032
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,32,64,0,0.02271360009908676
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,64,1,0,0.03765760064125061
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,32,2,0,0.030884799361228944
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,64,2,0,0.026767998933792114
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,64,8,0,0.016646400094032288
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,64,16,0,0.01661919951438904
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,64,32,0,0.014609600603580474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,64,64,0,0.014591999351978302
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,64,4,0,0.018592000007629395
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,64,1,0,0.05008959770202637
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,64,4,0,0.0268528014421463
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,64,2,0,0.03506399989128113
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,64,8,0,0.024817599356174468
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,64,32,0,0.024796800315380098
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,64,16,0,0.02483679950237274
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,128,4,0,0.022806400060653688
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,128,1,0,0.0534991979598999
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,128,2,0,0.03302719891071319
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,64,64,0,0.022710399329662324
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,128,16,0,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,128,32,0,0.016518400609493257
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,128,64,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,128,8,0,0.018702399730682374
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,128,1,0,0.06987040042877198
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,128,4,0,0.031718400120735166
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,128,2,0,0.04526079893112182
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,128,8,0,0.028828799724578857
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,128,16,0,0.026137599349021913
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,128,32,0,0.022961600124835967
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,256,1,0,0.09221119880676269
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,128,64,0,0.02277279943227768
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,256,2,0,0.052060800790786746
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,256,4,0,0.03299199938774109
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,256,32,0,0.020694400370121
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,256,16,0,0.022793599963188173
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,256,8,0,0.02876800000667572
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,256,64,0,0.02091200053691864
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,256,1,0,0.11550079584121704
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,256,4,0,0.04531520009040833
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,256,2,0,0.06782079935073852
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,256,8,0,0.037150400876998904
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,256,32,0,0.02895039916038513
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,256,16,0,0.030937600135803222
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,256,64,0,0.028915199637413024
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,512,1,0,0.1787503957748413
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,512,2,0,0.10178560018539429
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,512,4,0,0.061710399389266965
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,512,64,0,0.028833600878715514
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,512,32,0,0.030888000130653383
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,512,2,0,0.12683520317077637
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,512,1,0,0.22102880477905273
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,512,8,0,0.04103519916534424
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,512,16,0,0.03462879955768585
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,512,4,0,0.0778223991394043
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,512,8,0,0.05132799744606018
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,512,16,0,0.043227198719978335
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,512,32,0,0.0371535986661911
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,512,64,0,0.037150400876998904
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1024,1,0,0.38982400894165037
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1024,8,0,0.07734720110893249
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1024,16,0,0.051976001262664794
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1024,2,0,0.21325600147247314
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1024,4,0,0.12457760572433471
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1024,32,0,0.04529759883880615
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,1024,64,0,0.04318560063838959
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1024,1,0,0.4577631950378418
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1024,4,0,0.1493824005126953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1024,2,0,0.2546688079833984
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1024,8,0,0.09408959746360779
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,1024,64,0,0.051367998123168945
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1024,32,0,0.05552639961242676
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1024,16,0,0.06375679969787598
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1536,1,0,0.6619152069091797
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1536,4,0,0.1993615984916687
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1536,2,0,0.3501904010772705
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1536,16,0,0.08209279775619507
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1536,8,0,0.12076959609985352
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1536,32,0,0.061689597368240354
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,1536,64,0,0.05666720271110535
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1536,4,0,0.23006720542907716
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1536,2,0,0.40082077980041503
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1536,8,0,0.14058079719543456
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1536,1,0,0.7394527912139892
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1536,16,0,0.09675520062446594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1536,32,0,0.07396320104599
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,1536,64,0,0.06575999855995178
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,2048,1,0,0.9824848175048828
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,2048,2,0,0.5144256114959717
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,2048,8,0,0.17120640277862548
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,2048,4,0,0.28639199733734133
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,2048,32,0,0.0768559992313385
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,2048,64,0,0.06985599994659424
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,2048,16,0,0.11274080276489258
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,2048,1,0,1.0611328125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,2048,4,0,0.3183568000793457
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,2048,8,0,0.19193120002746583
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,2048,2,0,0.5662399768829346
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,2048,16,0,0.12729920148849488
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,2048,32,0,0.0902895987033844
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,2048,64,0,0.08100640177726745
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,3072,2,0,0.9293791770935058
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,3072,1,0,1.7884256362915039
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,3072,4,0,0.4979663848876953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,3072,8,0,0.28551359176635743
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,3072,32,0,0.1247599959373474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,3072,64,0,0.09699519872665405
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,3072,16,0,0.17906559705734254
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,3072,1,0,1.8244192123413085
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,3072,2,0,0.9609343528747558
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,3072,64,0,0.11083999872207642
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,3072,16,0,0.19623199701309205
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,3072,4,0,0.528876781463623
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,3072,32,0,0.14288640022277832
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,4096,8,0,0.4254511833190918
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,4096,1,0,2.8982032775878905
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,4096,16,0,0.2609407901763916
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,4096,4,0,0.7684319972991943
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,4096,2,0,1.4410240173339843
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,4096,32,0,0.17811520099639894
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,3072,8,0,0.31010239124298095
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,4096,4,0,0.7767727851867676
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,4096,8,0,0.4461215972900391
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,4096,64,0,0.12683520317077637
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,4096,16,0,0.2792655944824219
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,4096,1,0,2.7508800506591795
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,4096,2,0,1.4325535774230957
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,4096,64,0,0.14174560308456421
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,4096,32,0,0.19436320066452026
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,6144,8,0,0.7905471801757813
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,6144,2,0,2.844924736022949
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,6144,4,0,1.4788703918457031
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,6144,32,0,0.2930095911026001
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,6144,16,0,0.45953922271728515
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,6144,64,0,0.21069118976593018
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,6144,1,0,5.842582321166992
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,6144,32,0,0.3087968111038208
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,6144,16,0,0.4707295894622803
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,6144,64,0,0.2279616117477417
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,6144,4,0,1.3940799713134766
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,6144,2,0,2.620159912109375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,6144,8,0,0.7795760154724121
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,6144,1,0,5.113116836547851
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,8192,8,0,1.2646191596984864
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,8192,64,0,0.30830559730529783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,8192,16,0,0.7105088233947754
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,8192,4,0,2.4353872299194337
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,8192,32,0,0.44040961265563966
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,8192,2,0,4.880953598022461
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,8192,8,0,1.1882271766662598
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,8192,32,0,0.44905757904052734
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,8192,4,0,2.168619155883789
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,8192,16,0,0.6969615936279296
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,8192,1,0,9.958060455322265
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,8192,2,0,4.126335906982422
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,8192,64,0,0.3214416027069092
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1,2,0,0.024827200174331664
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1,1,0,0.03503200113773346
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1,4,0,0.0220223993062973
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1,8,0,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,8192,1,0,8.241741180419922
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,1,64,0,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1,2,0,0.03299359977245331
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1,4,0,0.026843199133872987
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1,1,0,0.0411871999502182
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1,32,0,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1,8,0,0.022750400006771088
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1,32,0,0.022572800517082214
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1,16,0,0.014507199823856353
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,1,64,0,0.022651199996471406
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,16,1,0,0.04103359878063202
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,16,2,0,0.026862400770187377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,16,8,0,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,16,4,0,0.020744000375270844
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1,16,0,0.022675199806690215
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,16,16,0,0.01655520051717758
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,16,64,0,0.014478400349617004
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,16,1,0,0.04952479898929596
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,16,2,0,0.03540480136871338
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,16,4,0,0.028921601176261903
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,16,8,0,0.02484000027179718
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,16,32,0,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,16,32,0,0.022676800191402436
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,16,64,0,0.02269600033760071
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,16,16,0,0.022761599719524385
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,32,4,0,0.020785599946975708
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,32,1,0,0.04812000095844269
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,32,8,0,0.01658080071210861
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,32,32,0,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,32,64,0,0.014864000678062438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,32,2,0,0.03278239965438843
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,32,4,0,0.030379199981689455
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,32,2,0,0.04116480052471161
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,32,1,0,0.05777119994163513
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,32,16,0,0.01658879965543747
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,32,8,0,0.024884800612926482
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,32,16,0,0.024740800261497498
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,32,32,0,0.022745600342750548
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,64,1,0,0.06211360096931458
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,64,2,0,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,64,4,0,0.0267984002828598
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,64,16,0,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,32,64,0,0.022711999714374542
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,64,32,0,0.016579200327396394
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,64,64,0,0.016598400473594666
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,64,1,0,0.07761279940605163
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,64,8,0,0.01865919977426529
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,64,2,0,0.05049920082092285
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,64,8,0,0.02792159914970398
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,64,4,0,0.03503519892692566
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,64,32,0,0.02279040068387985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,64,64,0,0.024726399779319765
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,128,1,0,0.09305279850959777
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,128,4,0,0.03508639931678772
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,64,16,0,0.024822400510311128
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,128,8,0,0.024827200174331664
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,128,16,0,0.020713600516319274
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,128,32,0,0.01860480010509491
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,128,2,0,0.05343040227890015
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,128,64,0,0.017609600722789765
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,128,1,0,0.12062400579452515
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,128,2,0,0.07004960179328919
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,128,16,0,0.029657599329948426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,128,8,0,0.03499679863452911
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,128,32,0,0.026878398656845093
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,128,64,0,0.02683520019054413
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,128,4,0,0.045300799608230594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,256,1,0,0.1643664002418518
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,256,2,0,0.09407680034637451
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,256,4,0,0.05433760285377502
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,256,8,0,0.035046398639678955
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,256,32,0,0.026836800575256347
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,256,16,0,0.030868801474571227
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,256,2,0,0.11914399862289429
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,256,4,0,0.07203680276870728
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,256,8,0,0.04747200012207031
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,256,1,0,0.2073535919189453
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,256,16,0,0.041201600432395936
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,256,64,0,0.024779200553894043
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,256,32,0,0.03505919873714447
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,512,2,0,0.1838863968849182
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,512,4,0,0.10792640447616578
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,512,1,0,0.3342384099960327
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,512,16,0,0.045311999320983884
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,512,8,0,0.06779360175132751
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,512,64,0,0.035051199793815616
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,512,32,0,0.03914560079574585
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,256,64,0,0.034959998726844785
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,512,2,0,0.22864959239959717
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,512,1,0,0.41208319664001464
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,512,16,0,0.057608002424240114
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,512,32,0,0.049379199743270874
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,512,8,0,0.08304479718208313
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,512,4,0,0.13377759456634522
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,512,64,0,0.045263999700546266
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1024,1,0,0.7563568115234375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1024,8,0,0.13767199516296386
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1024,16,0,0.08970879912376403
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1024,4,0,0.2252000093460083
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1024,2,0,0.40315680503845214
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1024,32,0,0.06376799941062927
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,1024,64,0,0.057518398761749266
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1024,1,0,0.8822463989257813
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1024,8,0,0.16284960508346558
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1024,16,0,0.10993280410766601
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1024,32,0,0.07803519964218139
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,1024,64,0,0.06987360119819641
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1024,4,0,0.26800639629364015
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1024,2,0,0.4730688095092773
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1536,1,0,1.3021136283874513
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1536,32,0,0.09856799840927125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1536,8,0,0.21638879776000977
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1536,4,0,0.36770238876342776
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,1536,64,0,0.07766240239143371
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1536,2,0,0.6733344078063965
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1536,2,0,0.7632559776306153
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1536,16,0,0.1383247971534729
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1536,1,0,1.4398192405700683
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1536,4,0,0.42256960868835447
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1536,8,0,0.2501823902130127
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1536,16,0,0.16157599687576293
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,1536,64,0,0.09447680115699768
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1536,32,0,0.11866879463195801
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,2048,8,0,0.30789120197296144
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,2048,32,0,0.13351839780807495
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,2048,16,0,0.1920591950416565
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,2048,4,0,0.5364607810974121
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,2048,64,0,0.09943199753761292
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,2048,2,0,1.0019583702087402
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,2048,1,0,1.9538703918457032
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,2048,2,0,1.0882448196411132
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,2048,8,0,0.347760009765625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,2048,4,0,0.5953775882720947
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,2048,16,0,0.22138400077819825
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,2048,32,0,0.15750240087509154
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,2048,1,0,2.074295997619629
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,2048,64,0,0.11885759830474854
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,3072,8,0,0.5284016132354736
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,3072,4,0,0.9625344276428223
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,3072,16,0,0.3193664073944092
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,3072,2,0,1.824875259399414
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,3072,32,0,0.2129728078842163
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,3072,1,0,3.6395999908447267
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,3072,8,0,0.569814395904541
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,3072,32,0,0.24112799167633056
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,3072,64,0,0.15881919860839844
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,3072,4,0,1.0029647827148438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,3072,16,0,0.35337119102478026
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,3072,2,0,1.8703327178955078
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,3072,1,0,3.580753707885742
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,4096,16,0,0.4717088222503662
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,3072,64,0,0.18384159803390504
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,4096,8,0,0.8116368293762207
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,4096,2,0,2.904747200012207
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,4096,4,0,1.5023520469665528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,4096,32,0,0.3058320045471191
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,4096,64,0,0.22539520263671875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,4096,1,0,5.828548812866211
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,4096,32,0,0.33555519580841064
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,4096,4,0,1.4910479545593263
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,4096,16,0,0.5044991970062256
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,4096,8,0,0.8349727630615235
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,4096,2,0,2.808353614807129
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,4096,64,0,0.24981279373168946
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,4096,1,0,5.420329666137695
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1,4,0,0.02483839988708496
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1,1,0,0.0557807981967926
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1,2,0,0.034887999296188354
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1,8,0,0.02067520022392273
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1,32,0,0.014971199631690978
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1,16,0,0.015537600219249725
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1,1,0,0.06199359893798828
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1,2,0,0.04119200110435486
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1,8,0,0.026940798759460448
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,1,64,0,0.013673600554466248
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1,32,0,0.02282239943742752
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1,16,0,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1,4,0,0.03222079873085022
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,1,64,0,0.02078399956226349
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,16,1,0,0.06835520267486572
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,16,2,0,0.041176000237464906
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,16,4,0,0.027612799406051637
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,16,32,0,0.014612799882888794
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,16,64,0,0.015547199547290802
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,16,1,0,0.07890880107879639
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,16,2,0,0.04939360022544861
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,16,16,0,0.016521599888801575
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,16,8,0,0.026855999231338502
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,16,16,0,0.024830399453639983
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,16,8,0,0.020729599893093108
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,16,4,0,0.035062399506568906
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,16,64,0,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,32,1,0,0.08165280222892761
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,16,32,0,0.022726400196552275
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,32,8,0,0.02268480062484741
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,32,4,0,0.03301919996738434
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,32,64,0,0.014620800316333771
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,32,1,0,0.09481120109558105
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,32,2,0,0.04772160053253174
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,32,2,0,0.05882560014724732
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,32,4,0,0.04118239879608154
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,32,16,0,0.018555200099945067
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,32,8,0,0.030934399366378783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,32,32,0,0.016515199840068818
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,32,16,0,0.026824000477790832
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,32,32,0,0.02465119957923889
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,64,1,0,0.11271840333938599
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,64,2,0,0.06382240056991577
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,64,8,0,0.028944000601768494
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,32,64,0,0.022840000689029694
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,64,32,0,0.018675200641155243
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,64,4,0,0.041064000129699706
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,64,64,0,0.018590399622917177
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,64,1,0,0.13775039911270143
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,64,2,0,0.07879679799079894
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,64,16,0,0.02073120027780533
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,64,4,0,0.053260797262191774
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,64,16,0,0.02894560098648071
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,64,8,0,0.037134400010108946
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,64,64,0,0.02512960135936737
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,128,1,0,0.17244160175323486
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,128,2,0,0.0971231997013092
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,64,32,0,0.026872000098228453
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,128,8,0,0.03707840144634247
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,128,16,0,0.02691200077533722
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,128,4,0,0.05768479704856873
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,128,64,0,0.02274720072746277
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,128,1,0,0.21962239742279052
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,128,2,0,0.12168320417404174
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,128,32,0,0.024780799448490144
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,128,8,0,0.04937280118465424
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,128,16,0,0.03704000115394592
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,128,4,0,0.07426720261573791
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,128,32,0,0.03298560082912445
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,128,64,0,0.030935999751091004
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,256,1,0,0.3074656009674072
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,256,2,0,0.16879839897155763
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,256,8,0,0.06139039993286133
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,256,16,0,0.04144960045814514
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,256,32,0,0.03504799902439117
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,256,4,0,0.09928479790687561
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,256,1,0,0.38731200695037843
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,256,64,0,0.03237600028514862
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,256,4,0,0.12732160091400146
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,256,8,0,0.07883999943733215
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,256,16,0,0.055587202310562134
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,256,32,0,0.04734239876270294
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,256,2,0,0.21331520080566407
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,256,64,0,0.043244799971580504
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,512,1,0,0.6461904048919678
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,512,8,0,0.11851199865341186
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,512,2,0,0.3463104009628296
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,512,4,0,0.19693440198898315
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,512,16,0,0.07958400249481201
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,512,32,0,0.05757920145988464
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,512,4,0,0.24263041019439696
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,512,2,0,0.4256303787231445
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,512,8,0,0.14723520278930663
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,512,1,0,0.7912015914916992
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,512,64,0,0.06374880075454711
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,512,32,0,0.07192959785461425
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,512,64,0,0.0513759970664978
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,512,16,0,0.09917280077934265
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1024,4,0,0.4255504131317139
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1024,2,0,0.7791999816894531
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1024,8,0,0.24826879501342775
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1024,16,0,0.16028959751129152
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,1024,64,0,0.08424479961395263
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1024,32,0,0.11287360191345215
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1024,1,0,1.4991616249084472
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1024,1,0,1.7279903411865234
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1024,16,0,0.1918768048286438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1024,8,0,0.2957391977310181
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1024,2,0,0.9117648124694824
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1024,4,0,0.502452802658081
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,1024,64,0,0.10868159532546998
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1024,32,0,0.13668960332870483
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1536,16,0,0.24902400970458985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1536,32,0,0.17315839529037474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1536,8,0,0.4028927803039551
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1536,4,0,0.7071872234344483
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,1536,64,0,0.1333583950996399
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1536,2,0,1.3310288429260253
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1536,1,0,2.5769088745117186
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1536,2,0,1.480246353149414
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1536,8,0,0.46506562232971194
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1536,4,0,0.8048159599304199
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1536,1,0,2.829275131225586
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1536,16,0,0.29538400173187257
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1536,32,0,0.2039423942565918
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,1536,64,0,0.1605679988861084
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,2048,32,0,0.23946239948272705
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,2048,8,0,0.5818751811981201
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,2048,4,0,1.0531599998474122
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,2048,2,0,1.9734783172607422
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,2048,16,0,0.35254559516906736
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,2048,64,0,0.18042880296707153
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,2048,1,0,4.096451187133789
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,2048,1,0,3.938671875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,2048,4,0,1.1454208374023438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,2048,8,0,0.6520192146301269
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,2048,2,0,2.1308095932006834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,2048,16,0,0.40430560111999514
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,2048,32,0,0.27840960025787354
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,2048,64,0,0.2134943962097168
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1,1,0,0.1019279956817627
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1,4,0,0.03499679863452911
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1,8,0,0.024868799746036528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1,32,0,0.016657599806785585
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1,2,0,0.055827200412750244
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1,16,0,0.018904000520706177
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,1,64,0,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1,2,0,0.061187201738357545
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1,1,0,0.10275520086288452
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1,4,0,0.04304800033569336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1,8,0,0.03118399977684021
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1,16,0,0.02678079903125763
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,16,2,0,0.06906880140304565
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,1,64,0,0.02269279956817627
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,16,4,0,0.042027199268341066
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,16,8,0,0.02890239953994751
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1,32,0,0.02486560046672821
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,16,16,0,0.020630399882793426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,16,32,0,0.016547200083732606
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,16,1,0,0.12166880369186402
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,16,64,0,0.01653759926557541
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,16,16,0,0.028921601176261903
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,16,2,0,0.07802079916000366
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,16,32,0,0.02491360008716583
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,16,4,0,0.05106080174446106
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,32,1,0,0.15287679433822632
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,16,8,0,0.037110400199890134
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,16,1,0,0.1341040015220642
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,32,2,0,0.08271520137786866
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,32,16,0,0.022716799378395082
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,16,64,0,0.022758400440216063
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,32,4,0,0.048816001415252684
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,32,32,0,0.01857919991016388
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,32,8,0,0.03496319949626923
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,32,2,0,0.09677919745445251
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,32,4,0,0.05942400097846985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,32,16,0,0.03298079967498779
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,32,1,0,0.17319999933242797
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,32,8,0,0.04333760142326355
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,32,32,0,0.026764801144599913
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,32,64,0,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,64,2,0,0.11731519699096679
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,64,1,0,0.21214079856872559
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,64,4,0,0.06620799899101257
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,32,64,0,0.024803200364112855
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,64,32,0,0.023585599660873414
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,64,16,0,0.03099679946899414
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,64,8,0,0.04318079948425293
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,64,1,0,0.25797119140625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,64,2,0,0.14379520416259767
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,64,4,0,0.08216800093650818
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,64,16,0,0.041238400340080264
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,64,64,0,0.02272319942712784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,64,8,0,0.055516797304153445
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,64,32,0,0.031228798627853393
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,128,1,0,0.32485759258270264
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,128,2,0,0.17841440439224243
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,64,64,0,0.03094559907913208
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,128,8,0,0.06371039748191834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,128,4,0,0.10070719718933105
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,128,16,0,0.04322560131549835
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,128,64,0,0.02884480059146881
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,128,4,0,0.12981280088424682
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,128,1,0,0.4117231845855713
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,128,32,0,0.03295679986476898
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,128,8,0,0.08106719851493835
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,128,2,0,0.22652320861816405
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,128,16,0,0.05552319884300232
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,128,64,0,0.039164799451828006
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,256,1,0,0.5918735980987548
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,256,4,0,0.18090080022811889
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,256,2,0,0.3202336072921753
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,256,16,0,0.07190399765968322
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,128,32,0,0.043222400546073916
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,256,32,0,0.05347679853439331
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,256,8,0,0.11052960157394409
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,256,64,0,0.04729439914226532
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,256,1,0,0.744652795791626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,256,8,0,0.14041119813919067
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,256,4,0,0.2274480104446411
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,256,2,0,0.401364803314209
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,256,32,0,0.06988319754600525
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,256,16,0,0.09484959840774536
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,512,2,0,0.6718128204345704
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,512,1,0,1.2747568130493163
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,512,4,0,0.369214391708374
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,512,8,0,0.21768479347229003
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,512,32,0,0.10127840042114258
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,256,64,0,0.06178560256958008
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,512,16,0,0.14150079488754272
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,512,64,0,0.08004959821701049
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,512,1,0,1.5505135536193848
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,512,4,0,0.4540527820587158
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,512,8,0,0.27175040245056153
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,512,16,0,0.17496800422668457
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,512,2,0,0.8225839614868165
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,512,64,0,0.10096479654312134
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,512,32,0,0.127729594707489
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1024,16,0,0.29323039054870603
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1024,8,0,0.4711455821990967
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,1024,64,0,0.15696320533752442
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1024,4,0,0.825169563293457
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1024,2,0,1.5447808265686036
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1024,1,0,2.9904207229614257
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1024,32,0,0.20370080471038818
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1024,32,0,0.24849600791931153
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1024,2,0,1.7867023468017578
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1024,8,0,0.5571616172790528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1024,4,0,0.9684368133544922
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,1,1,0,0.17953439950942993
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1024,16,0,0.3529792070388794
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1024,1,0,3.419887924194336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,1024,64,0,0.19237760305404664
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,1,2,0,0.0969760000705719
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,1,4,0,0.05644479990005493
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,1,32,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,1,64,0,0.014878399670124054
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,1,16,0,0.02476799935102463
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,1,4,0,0.06250399947166443
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,1,2,0,0.10304800271987916
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,1,1,0,0.18466880321502685
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,1,8,0,0.0350847989320755
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,1,8,0,0.04197919964790344
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,1,16,0,0.033020800352096556
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,1,32,0,0.02677600085735321
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,1,64,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,16,1,0,0.23076961040496827
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,16,2,0,0.12320480346679688
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,16,16,0,0.028918400406837463
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,16,32,0,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,16,64,0,0.018607999384403228
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,16,4,0,0.06989120244979859
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,16,1,0,0.25063519477844237
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,16,8,0,0.043227198719978335
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,16,2,0,0.13496799468994142
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,16,8,0,0.05143839716911316
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,16,4,0,0.08048160076141357
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,16,16,0,0.03729600012302399
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,16,32,0,0.030260801315307617
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,32,1,0,0.2891871929168701
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,16,64,0,0.02680320143699646
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,32,4,0,0.08500159978866577
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,32,8,0,0.05345600247383118
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,32,16,0,0.03704800009727478
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,32,32,0,0.02687999904155731
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,32,64,0,0.022672000527381896
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,32,1,0,0.32229440212249755
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,32,4,0,0.10057920217514038
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,32,2,0,0.17575039863586425
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,32,8,0,0.06372479796409607
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,32,2,0,0.155348801612854
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,32,16,0,0.04535999894142151
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,32,64,0,0.030873599648475646
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,64,1,0,0.4059408187866211
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,64,2,0,0.2163327932357788
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,32,32,0,0.035036799311637876
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,64,8,0,0.0719871997833252
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,64,64,0,0.028892800211906433
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,64,4,0,0.12142560482025147
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,64,32,0,0.03703039884567261
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,64,16,0,0.04939199984073639
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,64,1,0,0.48888797760009767
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,64,4,0,0.14978400468826295
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,64,8,0,0.09014400243759155
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,64,32,0,0.04938879907131195
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,64,16,0,0.06293920278549195
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,64,2,0,0.265064001083374
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,128,1,0,0.6330448150634765
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,64,64,0,0.03918879926204681
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,128,8,0,0.11401599645614624
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,128,4,0,0.18718719482421875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,128,16,0,0.07543200254440308
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,128,32,0,0.05554080009460449
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,128,2,0,0.336407995223999
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,128,1,0,0.7959487915039063
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,128,2,0,0.4250783920288086
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,128,64,0,0.04530400037765503
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,128,4,0,0.24021759033203124
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,128,16,0,0.0978223979473114
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,128,8,0,0.1439743995666504
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,128,32,0,0.069896000623703
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,128,64,0,0.057555198669433594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,256,8,0,0.20259358882904052
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,256,4,0,0.3409535884857178
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,256,1,0,1.1629119873046876
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,256,64,0,0.07447999715805054
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,256,2,0,0.6144415855407714
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,256,32,0,0.09528639912605286
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,256,1,0,1.459716796875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,256,16,0,0.13154879808425904
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,256,2,0,0.772438383102417
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,256,16,0,0.17025599479675294
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,256,4,0,0.42847042083740233
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,256,32,0,0.12309919595718384
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,256,64,0,0.0985376000404358
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,256,8,0,0.2568048000335693
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,512,8,0,0.4118607997894287
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,512,16,0,0.26314239501953124
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,512,1,0,2.541753578186035
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,512,32,0,0.18487679958343506
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,512,64,0,0.14543360471725464
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,512,4,0,0.71287522315979
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,512,2,0,1.3182496070861816
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,512,1,0,3.07326717376709
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,512,32,0,0.2337023973464966
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,512,2,0,1.6100208282470703
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,512,16,0,0.3264591932296753
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,512,4,0,0.8764335632324218
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,512,64,0,0.18545440435409546
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,512,8,0,0.508678388595581
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,1,1,0,0.34669759273529055
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,1,4,0,0.09845920205116272
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,1,16,0,0.03504000008106232
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,1,32,0,0.024792000651359558
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,1,8,0,0.05650720000267029
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,1,64,0,0.01868479996919632
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,1,2,0,0.18170880079269408
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,1,1,0,0.346343994140625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,1,2,0,0.18431040048599243
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,1,4,0,0.10440479516983033
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,1,8,0,0.06253119707107543
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,1,16,0,0.04245119988918304
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,1,64,0,0.024868799746036528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,16,8,0,0.07312319874763488
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,16,2,0,0.23197600841522217
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,16,1,0,0.44206881523132324
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,16,16,0,0.04530879855155945
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,1,32,0,0.031001600623130798
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,16,32,0,0.031860798597335815
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,16,64,0,0.02481600046157837
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,16,4,0,0.1261199951171875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,16,1,0,0.4719088077545166
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,16,2,0,0.25277440547943114
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,16,8,0,0.08432480096817016
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,16,32,0,0.04122079908847809
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,16,4,0,0.1408511996269226
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,16,64,0,0.03496159911155701
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,16,16,0,0.05552800297737122
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,32,4,0,0.16128480434417725
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,32,2,0,0.2923824071884155
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,32,8,0,0.09259039759635926
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,32,1,0,0.5564127922058105
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,32,64,0,0.032839998602867126
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,32,16,0,0.05812320113182068
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,32,32,0,0.04127039909362793
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,32,2,0,0.3288095951080322
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,32,4,0,0.1840831995010376
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,32,8,0,0.1073248028755188
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,32,1,0,0.6169871807098388
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,32,32,0,0.053478401899337766
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,32,64,0,0.0432096004486084
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,32,16,0,0.06984159946441651
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,64,4,0,0.22869439125061036
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,64,2,0,0.4158783912658691
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,64,1,0,0.7933119773864746
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,64,8,0,0.13248480558395387
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,64,32,0,0.059614402055740354
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,64,64,0,0.04737119972705841
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,64,2,0,0.5022448062896728
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,64,16,0,0.08418239951133728
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,64,1,0,0.9483167648315429
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,64,16,0,0.10460000038146973
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,64,8,0,0.1642400026321411
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,64,32,0,0.07635520100593567
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,64,4,0,0.28033759593963625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,64,64,0,0.061684799194335935
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,128,2,0,0.6518256187438964
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,128,1,0,1.2431743621826172
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,128,4,0,0.3579456090927124
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,128,32,0,0.09656000137329102
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,128,8,0,0.21104159355163574
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,128,16,0,0.1343616008758545
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,128,64,0,0.07593439817428589
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,128,2,0,0.8218784332275391
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,128,1,0,1.5580495834350585
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,128,16,0,0.17286720275878906
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,128,8,0,0.2687455892562866
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,128,32,0,0.1265679955482483
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,128,4,0,0.45298399925231936
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,128,64,0,0.10006719827651978
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,256,2,0,1.20546236038208
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,256,4,0,0.6593552112579346
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,256,1,0,2.3101648330688476
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,256,8,0,0.38526880741119385
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,256,16,0,0.2454591989517212
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,256,32,0,0.1766047954559326
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,256,64,0,0.13772799968719482
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,256,1,0,2.8826608657836914
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,256,2,0,1.5179056167602538
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,256,4,0,0.8285872459411621
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,256,16,0,0.31280319690704345
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,256,32,0,0.22560319900512696
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,256,64,0,0.17854559421539307
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,256,8,0,0.486348819732666
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,1,1,0,0.016548800468444824
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1,8,0,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1,16,0,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1,4,0,0.014468799531459808
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1,32,0,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1,64,0,0.014115199446678162
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1,2,0,0.014580799639225006
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,1,1,0,0.02067359983921051
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1,8,0,0.020979200303554536
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1,4,0,0.020803199708461763
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1,32,0,0.022728000581264497
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1,64,0,0.018638400733470915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1,16,0,0.018675200641155243
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1,2,0,0.02279520034790039
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,16,1,0,0.01672479957342148
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16,2,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16,4,0,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16,32,0,0.014497600495815277
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16,16,0,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16,64,0,0.014473600685596466
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16,8,0,0.014486399292945863
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,16,1,0,0.02279680073261261
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16,8,0,0.022697600722312927
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16,4,0,0.022697600722312927
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16,16,0,0.020803199708461763
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16,64,0,0.02072319984436035
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16,32,0,0.022729599475860597
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,32,1,0,0.01653279960155487
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16,2,0,0.022752000391483305
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32,2,0,0.014697599411010741
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32,4,0,0.015044799447059632
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32,32,0,0.012454400211572647
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32,8,0,0.014593599736690522
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32,64,0,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,32,1,0,0.026719999313354493
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32,2,0,0.022761599719524385
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32,16,0,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32,8,0,0.020737600326538087
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32,16,0,0.020735999941825865
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32,32,0,0.020732800662517547
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32,64,0,0.0227183997631073
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,64,1,0,0.018611200153827667
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,64,2,0,0.016641600430011748
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,64,8,0,0.015112000703811645
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32,4,0,0.022702400386333466
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,64,64,0,0.014662399888038635
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,64,1,0,0.026820799708366393
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,64,16,0,0.014472000300884247
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,64,2,0,0.0247871994972229
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,64,4,0,0.016519999504089354
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,64,4,0,0.022672000527381896
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,64,32,0,0.014531199634075165
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,64,16,0,0.022758400440216063
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,64,32,0,0.02269120067358017
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,64,64,0,0.02072640061378479
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,128,1,0,0.021457600593566894
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,64,8,0,0.022835199534893037
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,128,4,0,0.016646400094032288
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,128,2,0,0.018568000197410582
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,128,16,0,0.01454080045223236
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,128,32,0,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,128,64,0,0.015304000675678253
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,128,1,0,0.033011201024055484
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,128,8,0,0.014628799259662628
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,128,4,0,0.024806399643421174
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,128,32,0,0.02281759977340698
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,128,16,0,0.022702400386333466
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,128,8,0,0.02276639938354492
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,128,2,0,0.02690880000591278
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,128,64,0,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,256,2,0,0.024833600223064422
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,256,1,0,0.030921599268913268
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,256,8,0,0.018697600066661834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,256,4,0,0.02077440023422241
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,256,16,0,0.01865279972553253
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,256,1,0,0.04128639996051788
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,256,32,0,0.01863040030002594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,256,2,0,0.03502880036830902
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,256,64,0,0.018620799481868743
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,256,8,0,0.02688640058040619
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,256,16,0,0.026878398656845093
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,256,32,0,0.026872000098228453
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,256,64,0,0.02688960134983063
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,256,4,0,0.030868801474571227
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,512,2,0,0.03506399989128113
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,512,1,0,0.05650079846382141
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,512,8,0,0.024803200364112855
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,512,16,0,0.022699199616909027
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,512,32,0,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,512,4,0,0.0289247989654541
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,512,64,0,0.021622399985790252
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,512,2,0,0.045300799608230594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,512,4,0,0.03734880089759827
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,512,1,0,0.0717408001422882
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,512,8,0,0.03292160034179688
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,512,32,0,0.028910401463508605
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,1024,1,0,0.11622240543365478
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1024,2,0,0.06831039786338806
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,512,64,0,0.02884320020675659
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1024,4,0,0.043182399868965146
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,512,16,0,0.030956798791885377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1024,16,0,0.033000001311302186
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1024,32,0,0.02928000092506409
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1024,8,0,0.035087999701499936
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1024,64,0,0.02911520004272461
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,1024,1,0,0.13760160207748412
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1024,16,0,0.03917120099067688
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1024,4,0,0.0514735996723175
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1024,2,0,0.08095679879188537
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1024,32,0,0.038196799159049985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1024,64,0,0.03512639999389648
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,1536,1,0,0.18491840362548828
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1024,8,0,0.04332480132579804
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1536,4,0,0.06649439930915832
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1536,32,0,0.03912639915943146
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1536,8,0,0.046435201168060304
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1536,16,0,0.041099199652671815
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1536,2,0,0.10690399408340454
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,1536,1,0,0.21010398864746094
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1536,64,0,0.03701280057430267
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1536,4,0,0.07728639841079712
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1536,16,0,0.04938879907131195
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1536,8,0,0.05551360249519348
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1536,32,0,0.0452320009469986
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1536,2,0,0.12223999500274658
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,2048,1,0,0.26469600200653076
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,2048,2,0,0.1507439970970154
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,2048,4,0,0.09077759981155395
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,2048,8,0,0.057843202352523805
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,2048,32,0,0.047225600481033324
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,2048,16,0,0.05101760029792786
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1536,64,0,0.04326240122318268
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,2048,64,0,0.045347198843955994
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,2048,1,0,0.2949664115905762
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,2048,2,0,0.16861920356750487
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,2048,4,0,0.10234240293502808
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,2048,8,0,0.06564639806747437
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,2048,16,0,0.05753920078277588
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,2048,32,0,0.05140799880027771
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,2048,64,0,0.04941760003566742
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,3072,2,0,0.2541935920715332
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,3072,4,0,0.14962719678878783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,3072,16,0,0.0697376012802124
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,3072,32,0,0.061705601215362546
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,3072,8,0,0.0949616014957428
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,3072,64,0,0.059622400999069215
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,3072,1,0,0.46904802322387695
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,3072,2,0,0.2724735975265503
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,3072,8,0,0.10175520181655884
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,3072,16,0,0.0759775996208191
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,3072,4,0,0.1594607949256897
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,3072,32,0,0.06864159703254699
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,3072,64,0,0.06574079990386963
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,3072,1,0,0.4910639762878418
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,4096,1,0,0.7281904220581055
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,4096,32,0,0.0804144024848938
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,4096,8,0,0.1379968047142029
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,4096,64,0,0.07392799854278564
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,4096,2,0,0.3856623888015747
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,4096,16,0,0.08711199760437012
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,4096,1,0,0.7283599853515625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,4096,32,0,0.08375200033187866
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,4096,4,0,0.22767679691314696
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,4096,2,0,0.3966592073440552
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,4096,4,0,0.220032000541687
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,4096,8,0,0.1418544054031372
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,4096,16,0,0.09158719778060913
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,4096,64,0,0.07811520099639893
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,6144,2,0,0.7364831924438476
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,6144,1,0,1.4038607597351074
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,6144,4,0,0.3985663890838623
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,6144,64,0,0.10672800540924073
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,6144,8,0,0.230401611328125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,6144,8,0,0.23274400234222412
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,6144,1,0,1.3187503814697266
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,6144,32,0,0.11231679916381836
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,6144,4,0,0.39277920722961424
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,6144,2,0,0.7052224159240723
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,6144,16,0,0.1511904001235962
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,6144,16,0,0.1521056056022644
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,6144,32,0,0.11569119691848755
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,8192,8,0,0.35827040672302246
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,8192,4,0,0.6320735931396484
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,8192,2,0,1.1826255798339844
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,8192,16,0,0.22278881072998047
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,6144,64,0,0.10883519649505616
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,8192,32,0,0.14489920139312745
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,8192,1,0,2.359772872924805
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,8192,64,0,0.13622080087661742
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,8192,32,0,0.1465664029121399
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,8192,8,0,0.34470720291137696
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,8192,4,0,0.5961679935455322
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,8192,16,0,0.2190095901489258
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,8192,1,0,2.068244743347168
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,8192,64,0,0.13692480325698853
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,8192,2,0,1.0853343963623048
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,10240,1,0,3.594091033935547
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,10240,16,0,0.3070703983306885
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,10240,2,0,1.7535856246948243
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,10240,8,0,0.5068463802337646
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,10240,32,0,0.19246560335159302
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,10240,64,0,0.16881120204925537
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,10240,4,0,0.9175807952880859
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,10240,16,0,0.29637119770050047
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,10240,4,0,0.8373488426208496
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,10240,32,0,0.1890544056892395
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,10240,64,0,0.16444000005722045
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,10240,8,0,0.47569918632507324
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,10240,2,0,1.5459808349609374
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,10240,1,0,2.9938495635986326
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,12288,16,0,0.39913599491119384
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,12288,8,0,0.6950928211212158
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,12288,32,0,0.25714879035949706
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,12288,4,0,1.2620415687561035
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,12288,2,0,2.413015937805176
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,12288,64,0,0.19904799461364747
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,12288,8,0,0.6287295818328857
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,12288,4,0,1.1200528144836426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,12288,1,0,5.008135986328125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,12288,16,0,0.3757375955581665
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,12288,64,0,0.19306880235671997
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,12288,2,0,2.091139221191406
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,12288,32,0,0.24879040718078613
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,12288,1,0,4.14368782043457
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16384,16,0,0.6388847827911377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16384,64,0,0.26110239028930665
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16384,8,0,1.1247615814208984
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16384,32,0,0.39498400688171387
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16384,2,0,4.333707046508789
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16384,4,0,2.124776077270508
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16384,8,0,0.9867312431335449
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16384,16,0,0.5765071868896484
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16384,4,0,1.7938255310058593
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,16384,1,0,8.80361099243164
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16384,2,0,3.4531360626220704
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16384,32,0,0.37308320999145506
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16384,64,0,0.2497551918029785
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,16384,1,0,6.826576232910156
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32768,32,0,1.19193115234375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32768,16,0,2.109614372253418
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32768,64,0,0.7279280185699463
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32768,8,0,4.095851135253906
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32768,4,0,8.240670776367187
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32768,8,0,3.22071533203125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32768,16,0,1.7455568313598633
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32768,32,0,1.0226927757263184
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32768,2,0,16.558735656738282
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32768,4,0,6.362870407104492
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,1,1,0,0.019219200313091277
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32768,64,0,0.6676208019256592
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1,2,0,0.014590400457382201
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1,16,0,0.014430400729179383
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32768,2,0,12.754937744140625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1,4,0,0.014665600657463074
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1,32,0,0.014510400593280792
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1,64,0,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1,8,0,0.014521600306034088
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,1,1,0,0.02685759961605072
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1,2,0,0.02282239943742752
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1,8,0,0.02078399956226349
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1,16,0,0.022745600342750548
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1,64,0,0.02062080055475235
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1,32,0,0.02276480048894882
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1,4,0,0.02070080041885376
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,32768,1,0,32.97803649902344
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,16,1,0,0.01974239945411682
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16,4,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16,2,0,0.015003199875354766
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,32768,1,0,25.511825561523438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16,8,0,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16,64,0,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16,16,0,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16,2,0,0.02473440021276474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16,4,0,0.022655999660491942
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16,32,0,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,16,1,0,0.028265601396560668
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16,8,0,0.02282879948616028
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16,32,0,0.020750400424003602
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,32,1,0,0.02268799990415573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32,2,0,0.017475199699401856
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16,16,0,0.022759999334812164
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16,64,0,0.020692799985408784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32,32,0,0.013920000195503235
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32,16,0,0.014593599736690522
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32,4,0,0.0147599995136261
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32,64,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,32,1,0,0.028958401083946227
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32,4,0,0.02269279956817627
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32,2,0,0.024799999594688416
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32,8,0,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32,8,0,0.022711999714374542
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32,16,0,0.022745600342750548
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32,32,0,0.022448000311851502
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32,64,0,0.02274080067873001
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,64,1,0,0.02675360143184662
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,64,2,0,0.01863519996404648
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,64,4,0,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,64,16,0,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,64,32,0,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,64,64,0,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,64,8,0,0.01658879965543747
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,64,1,0,0.035051199793815616
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,64,2,0,0.026862400770187377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,64,4,0,0.024792000651359558
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,64,8,0,0.022808000445365906
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,64,16,0,0.022732800245285033
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,64,32,0,0.021638399362564086
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,64,64,0,0.02072319984436035
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,128,2,0,0.02279040068387985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,128,4,0,0.018644799292087556
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,128,1,0,0.033020800352096556
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,128,8,0,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,128,32,0,0.01658080071210861
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,128,16,0,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,128,64,0,0.014772799611091614
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,128,1,0,0.04521760046482086
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,128,2,0,0.032995200157165526
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,128,4,0,0.026851201057434083
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,128,16,0,0.024710400402545928
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,128,32,0,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,256,1,0,0.051046401262283325
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,128,64,0,0.022832000255584718
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,256,2,0,0.03252480030059814
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,128,8,0,0.02481279969215393
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,256,4,0,0.0268095999956131
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,256,8,0,0.022729599475860597
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,256,16,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,256,32,0,0.01866399943828583
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,256,64,0,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,256,1,0,0.0657472014427185
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,256,2,0,0.04120799899101257
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,256,8,0,0.02895520031452179
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,256,16,0,0.028411200642585753
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,256,4,0,0.034948799014091494
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,256,64,0,0.026791998744010927
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,256,32,0,0.027008000016212463
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,512,1,0,0.09839839935302734
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,512,4,0,0.035062399506568906
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,512,2,0,0.056948798894882205
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,512,8,0,0.02882240116596222
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,512,16,0,0.024820800125598907
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,512,1,0,0.12203680276870728
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,512,2,0,0.07053120136260986
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,512,64,0,0.022801600396633148
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,512,32,0,0.024702399969100952
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,512,4,0,0.04732159972190857
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,512,8,0,0.03715200126171112
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,512,16,0,0.033020800352096556
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,512,32,0,0.030947199463844298
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,512,64,0,0.028918400406837463
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,1024,1,0,0.2059839963912964
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1024,2,0,0.11587200164794922
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1024,8,0,0.043243199586868286
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1024,16,0,0.037268799543380735
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1024,4,0,0.07024480104446411
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1024,64,0,0.03092319965362549
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,1024,1,0,0.24396159648895263
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1024,2,0,0.13759039640426635
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1024,4,0,0.08293439745903015
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1024,8,0,0.05522400140762329
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1024,32,0,0.03301759958267212
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1024,64,0,0.03914879858493805
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1024,16,0,0.0458079993724823
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,1536,1,0,0.33851840496063235
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1024,32,0,0.04118399918079376
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1536,2,0,0.18747999668121337
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1536,4,0,0.10897279977798462
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1536,16,0,0.04937599897384644
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1536,32,0,0.043198400735855104
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1536,64,0,0.04116640090942383
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1536,8,0,0.0698848009109497
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1536,2,0,0.2136080026626587
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1536,4,0,0.12538880109786987
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,1536,1,0,0.38517439365386963
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1536,16,0,0.05759519934654236
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1536,32,0,0.05124800205230713
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1536,64,0,0.047460800409317015
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1536,8,0,0.07902079820632935
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,2048,4,0,0.15324800014495848
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,2048,2,0,0.2669663906097412
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,2048,16,0,0.05966079831123352
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,2048,32,0,0.05347040295600891
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,2048,8,0,0.09395999908447265
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,2048,1,0,0.4966127872467041
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,2048,64,0,0.049379199743270874
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,2048,4,0,0.17139999866485595
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,2048,2,0,0.2978384017944336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,2048,8,0,0.10576159954071045
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,2048,16,0,0.06870239973068237
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,2048,1,0,0.5476751804351807
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,2048,32,0,0.06091679930686951
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,3072,1,0,0.9054287910461426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,3072,4,0,0.258950400352478
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,3072,2,0,0.47164478302001955
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,2048,64,0,0.05556319952011109
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,3072,8,0,0.15318399667739868
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,3072,16,0,0.10016319751739503
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,3072,64,0,0.06781439781188965
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,3072,32,0,0.07321280241012573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,3072,1,0,0.9279199600219726
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,3072,2,0,0.49498400688171384
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,3072,16,0,0.10894399881362915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,3072,32,0,0.08017280101776122
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,3072,8,0,0.1647968053817749
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,3072,4,0,0.2775935888290405
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,3072,64,0,0.0739471971988678
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,4096,4,0,0.3926959991455078
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,4096,2,0,0.7353759765625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,4096,16,0,0.140556800365448
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,4096,8,0,0.2250080108642578
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,4096,1,0,1.4273839950561524
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,4096,2,0,0.7332496166229248
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,4096,32,0,0.09267839789390564
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,4096,1,0,1.390608024597168
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,4096,16,0,0.14887360334396363
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,4096,4,0,0.40408639907836913
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,4096,64,0,0.08586879968643188
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,4096,32,0,0.09938880205154418
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,4096,8,0,0.2342223882675171
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,4096,64,0,0.09241920113563537
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,6144,8,0,0.4094992160797119
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,6144,4,0,0.7389120101928711
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,6144,32,0,0.1590831995010376
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,6144,16,0,0.24008479118347167
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,6144,2,0,1.4166080474853515
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,6144,1,0,2.757004737854004
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,6144,4,0,0.7154352188110351
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,6144,2,0,1.3296192169189454
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,6144,8,0,0.40361762046813965
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,6144,64,0,0.12146879434585571
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,6144,1,0,2.5499151229858397
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,6144,16,0,0.24262399673461915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,6144,32,0,0.16233760118484497
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,6144,64,0,0.12518240213394166
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,8192,4,0,1.2144255638122559
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,8192,8,0,0.6474800109863281
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,8192,32,0,0.23651518821716308
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,8192,2,0,2.3281936645507812
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,8192,16,0,0.36821279525756834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,8192,64,0,0.15726239681243898
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,8192,1,0,4.731320190429687
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,8192,8,0,0.6112480163574219
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,8192,4,0,1.1015472412109375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,8192,16,0,0.36014881134033205
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,8192,64,0,0.16036319732666016
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,8192,32,0,0.23747520446777343
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,8192,2,0,2.0837919235229494
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,8192,1,0,4.03888168334961
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,10240,8,0,0.943876838684082
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,10240,4,0,1.7743135452270509
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,10240,16,0,0.5216847896575928
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,10240,32,0,0.3213344097137451
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,10240,2,0,3.5517982482910155
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,10240,64,0,0.20595040321350097
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,10240,1,0,7.337104034423828
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,10240,8,0,0.8574704170227051
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,10240,16,0,0.49488158226013185
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,10240,4,0,1.5651776313781738
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,10240,2,0,3.0051151275634767
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,10240,64,0,0.20774240493774415
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,10240,1,0,5.984856033325196
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,10240,32,0,0.31405599117279054
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,12288,8,0,1.2881775856018067
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,12288,16,0,0.7096879959106446
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,12288,4,0,2.474295997619629
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,12288,32,0,0.4148416042327881
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,12288,2,0,5.048124694824219
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,12288,64,0,0.2735856056213379
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,12288,4,0,2.11495361328125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,12288,64,0,0.2698832035064697
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,12288,8,0,1.1376239776611328
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,12288,2,0,4.122286224365235
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,12288,16,0,0.6519487857818603
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,12288,1,0,10.191519927978515
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,12288,32,0,0.3979439973831177
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,12288,1,0,8.328958129882812
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16384,16,0,1.1421824455261231
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16384,8,0,2.1428735733032225
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16384,64,0,0.41954717636108396
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16384,4,0,4.318975830078125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16384,32,0,0.6569632053375244
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16384,2,0,8.680687713623048
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16384,8,0,1.82236328125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16384,4,0,3.4719310760498048
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16384,16,0,1.0160672187805175
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16384,32,0,0.6063888072967529
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16384,2,0,6.933888244628906
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16384,64,0,0.4022655963897705
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,16384,1,0,17.79724884033203
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,16384,1,0,14.00196533203125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32768,16,0,4.137481689453125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32768,32,0,2.153660774230957
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32768,8,0,8.360203552246094
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32768,64,0,1.2396976470947265
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32768,4,0,16.435084533691406
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32768,16,0,3.2581905364990233
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32768,8,0,6.3481391906738285
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32768,64,0,1.0870688438415528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32768,4,0,12.645193481445313
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,1,1,0,0.026966398954391478
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32768,32,0,1.7973440170288086
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1,2,0,0.021985599398612977
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1,8,0,0.014870400726795196
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1,16,0,0.014582400023937226
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1,32,0,0.014505599439144135
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1,64,0,0.014550399780273438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1,4,0,0.016510400176048278
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32768,2,0,33.33120422363281
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,1,1,0,0.0329120010137558
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1,2,0,0.024868799746036528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32768,2,0,25.652203369140626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1,8,0,0.02306720018386841
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1,32,0,0.022750400006771088
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1,4,0,0.024694399535655977
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1,16,0,0.020694400370121
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1,64,0,0.020664000511169435
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16,2,0,0.020656000077724456
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16,4,0,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,16,1,0,0.027739199995994567
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16,16,0,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16,32,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16,8,0,0.014603200554847717
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16,64,0,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,16,1,0,0.03504799902439117
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16,4,0,0.024792000651359558
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16,2,0,0.028881600499153136
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16,8,0,0.022784000635147093
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16,16,0,0.02073120027780533
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,32,1,0,0.03097440004348755
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16,32,0,0.020721599459648132
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,32,2,0,0.02070080041885376
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16,64,0,0.022785599529743194
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,32,4,0,0.01666879951953888
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,32,16,0,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,32,64,0,0.014473600685596466
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,32,32,0,0.0144896000623703
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,32,1,0,0.04118080139160156
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,32,8,0,0.016459199786186218
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,32,2,0,0.029070401191711427
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,32,4,0,0.024833600223064422
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,32,8,0,0.024878400564193725
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,32,32,0,0.02276639938354492
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,32,16,0,0.02272160053253174
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,32,64,0,0.02276480048894882
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,64,1,0,0.03736160099506378
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,32768,1,0,51.56920166015625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,64,2,0,0.024817599356174468
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,64,4,0,0.018670399487018586
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,32768,1,0,67.27825317382812
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,64,8,0,0.01661919951438904
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,64,16,0,0.01653600037097931
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,64,32,0,0.014588800072669984
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,64,64,0,0.01465120017528534
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,64,2,0,0.035068801045417784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,64,4,0,0.026846399903297423
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,64,16,0,0.022787199914455415
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,64,32,0,0.024675199389457704
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,64,64,0,0.02266719937324524
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,64,1,0,0.049332800507545474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,128,1,0,0.05342559814453125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,64,8,0,0.024740800261497498
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,128,4,0,0.022705599665641785
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,128,2,0,0.032996800541877744
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,128,8,0,0.018667200207710268
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,128,16,0,0.016531200706958772
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,128,32,0,0.01663520038127899
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,128,2,0,0.043191999197006226
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,128,4,0,0.03298560082912445
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,128,64,0,0.01658399999141693
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,128,8,0,0.02893120050430298
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,128,16,0,0.024828800559043886
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,128,1,0,0.06958400011062622
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,128,32,0,0.024745599925518037
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,128,64,0,0.02330079972743988
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,256,2,0,0.05142880082130432
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,256,1,0,0.0901632010936737
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,256,8,0,0.025519999861717223
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,256,16,0,0.020721599459648132
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,256,4,0,0.03300639986991882
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,256,64,0,0.020636799931526183
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,256,32,0,0.020708799362182617
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,256,4,0,0.04326080083847046
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,256,2,0,0.06577919721603394
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,256,1,0,0.11532479524612427
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,256,8,0,0.0350383996963501
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,256,16,0,0.028911998867988585
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,256,64,0,0.027371200919151305
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,512,4,0,0.059622400999069215
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,512,2,0,0.09961439967155457
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,256,32,0,0.028892800211906433
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,512,8,0,0.037003201246261594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,512,1,0,0.17674560546875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,512,16,0,0.030956798791885377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,512,32,0,0.026851201057434083
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,512,64,0,0.024830399453639983
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,512,2,0,0.12259680032730103
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,512,1,0,0.22029919624328614
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,512,4,0,0.07298719882965088
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,512,32,0,0.0350735992193222
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,512,64,0,0.03293440043926239
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,512,16,0,0.039139199256896975
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1024,2,0,0.20768160820007325
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,1024,1,0,0.3838848114013672
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,512,8,0,0.047331199049949646
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1024,16,0,0.04736000001430511
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1024,32,0,0.03912799954414368
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1024,8,0,0.0726144015789032
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1024,64,0,0.03710399866104126
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1024,4,0,0.11913440227508545
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,1024,1,0,0.45300960540771484
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1024,2,0,0.24829440116882323
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1024,8,0,0.08675040006637573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1024,16,0,0.05750880241394043
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1024,32,0,0.04937120079994202
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1024,64,0,0.04324159920215607
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1024,4,0,0.14195040464401246
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1536,2,0,0.3433903932571411
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1536,4,0,0.19121439456939698
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1536,16,0,0.0730239987373352
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1536,8,0,0.11314400434494018
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,1536,1,0,0.6472303867340088
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1536,32,0,0.05317599773406982
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1536,64,0,0.04732959866523743
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1536,2,0,0.3894160032272339
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1536,4,0,0.21803679466247558
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1536,16,0,0.08428000211715699
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1536,8,0,0.1298815965652466
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,1536,1,0,0.7294015884399414
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1536,64,0,0.05554400086402893
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,2048,1,0,0.969972801208496
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,2048,2,0,0.5037824153900147
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,2048,4,0,0.2735071897506714
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1536,32,0,0.06212959885597229
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,2048,8,0,0.1603360056877136
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,2048,64,0,0.057662397623062134
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,2048,32,0,0.0653328001499176
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,2048,2,0,0.5520175933837891
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,2048,4,0,0.3040528059005737
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,2048,16,0,0.09989439845085143
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,2048,16,0,0.11210720539093018
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,2048,32,0,0.07609120011329651
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,2048,8,0,0.17673439979553224
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,2048,1,0,1.0457119941711426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,2048,64,0,0.06780959963798523
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,3072,8,0,0.26758561134338377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,3072,2,0,0.9062000274658203
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,3072,1,0,1.7857599258422852
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,3072,16,0,0.16234400272369384
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,3072,4,0,0.47992639541625975
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,3072,1,0,1.8041391372680664
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,3072,32,0,0.10898239612579345
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,3072,4,0,0.5072256088256836
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,3072,2,0,0.937990379333496
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,3072,64,0,0.08104159832000732
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,3072,16,0,0.17475999593734742
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,3072,8,0,0.28894240856170655
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,3072,32,0,0.11736799478530884
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,3072,64,0,0.09239680171012879
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,4096,4,0,0.7415296077728272
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,4096,16,0,0.23501119613647461
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,4096,8,0,0.40430560111999514
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,4096,2,0,1.4189023971557617
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,4096,32,0,0.15500320196151735
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,4096,1,0,2.8455055236816404
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,4096,64,0,0.10397759675979615
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,4096,16,0,0.24826400279998778
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,4096,2,0,1.403936004638672
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,4096,4,0,0.7498976230621338
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,4096,8,0,0.41779041290283203
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,4096,1,0,2.719406318664551
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,4096,64,0,0.11468319892883301
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,4096,32,0,0.16539520025253296
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,6144,16,0,0.4238560199737549
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,6144,8,0,0.7600224018096924
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,6144,2,0,2.867193603515625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,6144,4,0,1.4301039695739746
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,6144,32,0,0.25908479690551756
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,6144,64,0,0.1734655976295471
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,6144,1,0,5.813740921020508
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,6144,16,0,0.42511520385742185
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,6144,64,0,0.18328640460968018
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,6144,32,0,0.26543359756469725
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,6144,4,0,1.3517583847045898
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,6144,2,0,2.5787023544311523
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,6144,8,0,0.7375823974609375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,6144,1,0,5.125096130371094
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,8192,8,0,1.237665557861328
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,8192,64,0,0.25899519920349123
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,8192,4,0,2.3512527465820314
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,8192,16,0,0.6706719875335694
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,8192,32,0,0.39271039962768556
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,8192,2,0,4.887992095947266
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,8192,16,0,0.6394576072692871
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,8192,32,0,0.3912400007247925
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,8192,4,0,2.111302375793457
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,8192,8,0,1.1295632362365722
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,8192,2,0,4.115502548217774
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,8192,1,0,9.685700988769531
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,8192,64,0,0.2649519920349121
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,10240,8,0,1.816481590270996
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,10240,64,0,0.34886720180511477
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,8192,1,0,8.258920288085937
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,10240,4,0,3.5649951934814452
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,10240,16,0,0.9774640083312989
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,10240,32,0,0.5523551940917969
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,10240,2,0,7.34668197631836
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,10240,32,0,0.5339600086212158
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,10240,8,0,1.6047391891479492
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,10240,16,0,0.8944656372070312
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,10240,2,0,6.0043598175048825
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,10240,64,0,0.3508016109466553
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,10240,1,0,14.905668640136719
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,10240,4,0,3.0437984466552734
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,12288,16,0,1.3093008041381835
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,10240,1,0,12.1650146484375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,12288,4,0,5.0987598419189455
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,12288,64,0,0.45134878158569336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,12288,32,0,0.7437424182891845
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,12288,8,0,2.4894847869873047
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,12288,2,0,10.232418823242188
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,12288,32,0,0.6932112216949463
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,12288,16,0,1.1836480140686034
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,12288,8,0,2.1574560165405274
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,12288,4,0,4.185929489135742
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,12288,64,0,0.44255681037902833
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,12288,2,0,8.346180725097657
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,12288,1,0,20.193898010253907
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16384,16,0,2.194915199279785
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,12288,1,0,16.784744262695312
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16384,8,0,4.354275131225586
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16384,4,0,8.867040252685547
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16384,32,0,1.2012031555175782
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16384,64,0,0.7077392101287842
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16384,32,0,1.0749823570251464
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16384,16,0,1.8762992858886718
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16384,2,0,17.902096557617188
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16384,64,0,0.6669439792633056
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16384,8,0,3.514400100708008
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,1,1,0,0.036320000886917114
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16384,4,0,7.0656593322753904
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1,4,0,0.02462719976902008
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1,16,0,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1,8,0,0.01655679941177368
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1,2,0,0.02481440007686615
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1,64,0,0.012566399574279786
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1,32,0,0.014502400159835815
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,1,1,0,0.04148800075054169
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1,2,0,0.031115201115608216
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16384,2,0,14.07805938720703
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1,4,0,0.024833600223064422
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1,32,0,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1,8,0,0.022710399329662324
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1,16,0,0.022681599855422972
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1,64,0,0.02247840017080307
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,16384,1,0,35.45677795410156
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,16,2,0,0.026851201057434083
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,16,4,0,0.020739200711250304
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,16,8,0,0.014627200365066529
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,16,1,0,0.04060640037059784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,16,16,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,16,32,0,0.014486399292945863
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,16384,1,0,28.495318603515624
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,16,2,0,0.0350383996963501
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,16,1,0,0.0494592010974884
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,16,4,0,0.028887999057769776
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,16,16,0,0.02279199957847595
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,16,8,0,0.022777600586414336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,16,64,0,0.014609600603580474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,16,64,0,0.022767999768257143
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,32,1,0,0.04746400117874146
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,32,2,0,0.03214080035686493
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,32,8,0,0.01725279986858368
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,32,4,0,0.020768000185489653
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,32,16,0,0.016516800224781036
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,32,64,0,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,32,1,0,0.057529598474502563
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,32,32,0,0.01465280055999756
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,32,2,0,0.03915199935436249
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,16,32,0,0.022678400576114654
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,32,4,0,0.028932800889015196
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,32,8,0,0.02476319968700409
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,32,16,0,0.024745599925518037
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,32,32,0,0.022777600586414336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,32,64,0,0.022788800299167633
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,64,8,0,0.01857919991016388
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,64,2,0,0.0378383994102478
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,64,16,0,0.016648000478744505
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,64,32,0,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,64,1,0,0.06175360083580017
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,64,64,0,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,64,4,0,0.026627200841903686
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,64,1,0,0.07603039741516113
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,64,2,0,0.04965919852256775
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,64,4,0,0.03704639971256256
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,64,8,0,0.02685439884662628
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,64,16,0,0.024796800315380098
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,64,64,0,0.02281759977340698
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,64,32,0,0.02279680073261261
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,128,4,0,0.03293919861316681
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,128,2,0,0.05345919728279114
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,128,8,0,0.02351839989423752
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,128,16,0,0.018680000305175783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,128,32,0,0.018564799427986146
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,128,1,0,0.0927295982837677
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,128,64,0,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,128,2,0,0.06991519927978515
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,128,4,0,0.04530879855155945
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,128,8,0,0.03290559947490692
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,128,1,0,0.11773439645767211
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,128,16,0,0.028832000494003297
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,128,32,0,0.024817599356174468
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,256,1,0,0.160207998752594
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,256,2,0,0.09128959774971009
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,256,4,0,0.051846402883529666
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,128,64,0,0.02480800002813339
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,256,8,0,0.03303839862346649
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,256,32,0,0.022779199481010436
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,256,64,0,0.02272319942712784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,256,16,0,0.026844799518585205
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,256,1,0,0.20387680530548097
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,256,2,0,0.1153872013092041
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,256,4,0,0.06782559752464294
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,256,16,0,0.035068801045417784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,256,32,0,0.032948800921440126
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,256,64,0,0.030883198976516722
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,256,8,0,0.04323360025882721
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,512,2,0,0.17905919551849364
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,512,4,0,0.1024832010269165
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,512,1,0,0.3285984039306641
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,512,8,0,0.06308159828186036
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,512,16,0,0.03927200138568878
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,512,32,0,0.034796801209449765
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,512,64,0,0.028937599062919615
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,512,2,0,0.22232480049133302
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,512,4,0,0.12728320360183715
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,512,1,0,0.40590558052062986
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,512,16,0,0.05142719745635986
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,512,8,0,0.0762943983078003
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,512,32,0,0.04332799911499023
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,512,64,0,0.03712800145149231
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,1024,1,0,0.7445968151092529
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1024,4,0,0.21388161182403564
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1024,2,0,0.3901087999343872
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1024,16,0,0.07727360129356384
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1024,8,0,0.12588640451431274
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1024,64,0,0.045289599895477296
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1024,32,0,0.05152480006217956
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,1024,1,0,0.8690560340881348
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1024,4,0,0.25385921001434325
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1024,8,0,0.14629600048065186
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1024,2,0,0.45963358879089355
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1024,16,0,0.09476159811019898
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1024,32,0,0.06376479864120484
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1024,64,0,0.05553600192070007
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1536,4,0,0.3503551959991455
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1536,2,0,0.6617919921875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1536,8,0,0.19898560047149658
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,1536,1,0,1.2768351554870605
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1536,16,0,0.12233279943466187
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1536,32,0,0.08064320087432861
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1536,64,0,0.06166399717330932
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,1536,1,0,1.4155152320861817
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1536,2,0,0.7379295825958252
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1536,4,0,0.4012479782104492
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1536,8,0,0.2297071933746338
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1536,32,0,0.09444320201873779
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1536,16,0,0.13975679874420166
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1536,64,0,0.07190399765968322
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,2048,8,0,0.28436319828033446
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,2048,4,0,0.5177152156829834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,2048,16,0,0.170798397064209
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,2048,2,0,0.9769472122192383
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,2048,1,0,1.9154640197753907
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,2048,64,0,0.07711039781570435
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,2048,32,0,0.11073600053787232
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,2048,8,0,0.31780319213867186
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,2048,4,0,0.5673247814178467
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,2048,2,0,1.0593631744384766
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,2048,1,0,2.0449296951293947
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,2048,16,0,0.19299520254135133
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,2048,64,0,0.09034079909324647
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,2048,32,0,0.1296048045158386
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,3072,4,0,0.9334704399108886
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,3072,64,0,0.12416319847106934
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,3072,8,0,0.497273588180542
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,3072,2,0,1.7828208923339843
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,3072,16,0,0.2869488000869751
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,3072,32,0,0.17852480411529542
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,3072,1,0,3.621059036254883
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,3072,4,0,0.9605839729309082
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,3072,1,0,3.5411121368408205
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,3072,32,0,0.1970128059387207
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,3072,2,0,1.8278032302856446
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,3072,16,0,0.3100303888320923
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,3072,8,0,0.5275392055511474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,3072,64,0,0.14033440351486207
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,4096,8,0,0.7687632083892822
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,4096,32,0,0.25886399745941163
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,4096,2,0,2.815608024597168
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,4096,4,0,1.4435279846191407
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,4096,16,0,0.4299007892608643
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,4096,64,0,0.1774127960205078
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,4096,4,0,1.4345408439636231
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,4096,1,0,5.901219177246094
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,4096,16,0,0.4467520236968994
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,4096,2,0,2.7482864379882814
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,4096,8,0,0.7761040210723877
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,4096,64,0,0.19443199634552003
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,4096,1,0,5.372052764892578
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,4096,32,0,0.27921280860900877
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,6144,16,0,0.7909552097320557
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,6144,8,0,1.4750288009643555
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,6144,4,0,2.8056495666503904
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,6144,32,0,0.4600080013275146
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,6144,64,0,0.29192800521850587
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,6144,2,0,5.8999889373779295
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,6144,16,0,0.7782048225402832
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,6144,4,0,2.6217119216918947
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,6144,8,0,1.3939151763916016
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,6144,2,0,5.100716781616211
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,6144,1,0,11.889118194580078
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,6144,32,0,0.4706240177154541
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,6144,1,0,10.277677154541015
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,8192,32,0,0.7098207950592041
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,8192,8,0,2.3835935592651367
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,8192,4,0,4.771979141235351
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,8192,16,0,1.2615743637084962
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,6144,64,0,0.30840320587158204
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,8192,64,0,0.4429840087890625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,8192,2,0,10.003713226318359
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,8192,8,0,2.1694351196289063
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,8192,16,0,1.1891152381896972
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,8192,32,0,0.6985487937927246
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,1,1,0,0.05616160035133362
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,8192,4,0,4.1249439239501955
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,8192,2,0,8.273973083496093
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,8192,64,0,0.4478608131408691
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1,2,0,0.03338080048561096
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1,4,0,0.02484000027179718
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1,8,0,0.0213919997215271
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,8192,1,0,20.250804138183593
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1,64,0,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1,2,0,0.04113439917564392
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,8192,1,0,16.477445983886717
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1,16,0,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1,4,0,0.03296320140361786
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1,32,0,0.015095999836921692
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1,16,0,0.02274399995803833
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,1,1,0,0.06183040142059326
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1,32,0,0.02064799964427948
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,16,2,0,0.04119040071964264
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1,8,0,0.026830399036407472
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,16,1,0,0.06799200177192688
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,16,4,0,0.026926401257514953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,16,8,0,0.020619200170040132
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1,64,0,0.022675199806690215
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,16,32,0,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,16,64,0,0.014467200636863709
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,16,16,0,0.016543999314308167
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,16,2,0,0.05069440007209778
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,16,8,0,0.026868799328804018
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,16,1,0,0.07813439965248108
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,16,4,0,0.035046398639678955
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,16,32,0,0.022716799378395082
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,16,64,0,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,16,16,0,0.024732799828052522
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,32,2,0,0.0473471999168396
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,32,8,0,0.02279040068387985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,32,16,0,0.016633599996566772
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,32,32,0,0.016494399309158324
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,32,1,0,0.08062080144882203
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,32,4,0,0.03089439868927002
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,32,64,0,0.014609600603580474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,32,2,0,0.05733759999275208
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,32,4,0,0.04119200110435486
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,32,8,0,0.030865600705146788
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,32,16,0,0.024796800315380098
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,32,1,0,0.09402080178260804
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,32,32,0,0.02269120067358017
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,32,64,0,0.022745600342750548
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,64,1,0,0.11286720037460327
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,64,16,0,0.020423999428749083
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,64,4,0,0.03915199935436249
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,64,2,0,0.06179519891738892
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,64,8,0,0.02678079903125763
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,64,1,0,0.13800159692764283
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,64,2,0,0.07639200091362
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,64,64,0,0.01656319946050644
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,64,4,0,0.050811201333999634
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,64,8,0,0.03678559958934784
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,64,32,0,0.02486239969730377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,64,16,0,0.028259199857711793
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,64,32,0,0.017022399604320525
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,64,64,0,0.024772800505161285
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,128,1,0,0.1684831976890564
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,128,4,0,0.053491199016571046
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,128,16,0,0.024868799746036528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,128,32,0,0.020686399936676026
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,128,2,0,0.09454240202903748
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,128,64,0,0.018692800402641298
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,128,1,0,0.21547679901123046
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,128,2,0,0.12043199539184571
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,128,4,0,0.06994240283966065
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,128,8,0,0.03452480137348175
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,128,8,0,0.04715520143508911
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,128,16,0,0.035025599598884585
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,128,32,0,0.02889919877052307
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,256,2,0,0.16409120559692383
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,256,4,0,0.09471359848976135
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,256,1,0,0.3001391887664795
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,256,16,0,0.03511840105056763
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,256,32,0,0.030856001377105712
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,128,64,0,0.026867198944091796
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,256,64,0,0.026767998933792114
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,256,8,0,0.05395359992980957
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,256,1,0,0.37915840148925783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,256,2,0,0.2070591926574707
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,256,4,0,0.11913919448852539
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,256,8,0,0.0713536024093628
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,256,16,0,0.04906879961490631
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,256,32,0,0.03915359973907471
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,256,64,0,0.035041600465774536
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,512,2,0,0.33484640121459963
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,512,4,0,0.1848304033279419
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,512,16,0,0.06776800155639648
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,512,32,0,0.045296001434326175
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,512,8,0,0.10841280221939087
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,512,1,0,0.6366543769836426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,512,64,0,0.039134401082992556
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,512,1,0,0.7769296169281006
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,512,4,0,0.22828478813171388
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,512,2,0,0.4116799831390381
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,512,32,0,0.057576000690460205
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,512,64,0,0.05120800137519836
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,512,8,0,0.13383040428161622
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1024,8,0,0.22585439682006836
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1024,16,0,0.13603039979934692
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1024,2,0,0.7591184139251709
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1024,4,0,0.4012256145477295
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1024,32,0,0.08906880021095276
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,512,16,0,0.08226079940795898
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,1024,1,0,1.4808048248291015
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1024,4,0,0.4748239994049072
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1024,8,0,0.26865599155426023
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1024,2,0,0.8820240020751953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1024,16,0,0.1614527940750122
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1024,64,0,0.06393120288848878
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,1024,1,0,1.7010623931884765
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1536,8,0,0.36752800941467284
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1024,32,0,0.10988479852676392
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1536,4,0,0.6738560199737549
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1536,2,0,1.2900287628173828
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1536,32,0,0.13793439865112306
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1536,16,0,0.2160799980163574
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1024,64,0,0.07909920215606689
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,1536,1,0,2.5634559631347655
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1536,64,0,0.09714080095291137
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1536,8,0,0.42209601402282715
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1536,16,0,0.25107200145721437
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1536,2,0,1.4366448402404786
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1536,4,0,0.7597824096679687
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1536,32,0,0.16206879615783693
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1536,64,0,0.11786559820175171
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,1536,1,0,2.7880224227905273
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,2048,4,0,1.0099231719970703
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,2048,8,0,0.5356256008148194
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,2048,1,0,3.909328079223633
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,2048,16,0,0.3070528030395508
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,2048,32,0,0.19322880506515502
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,2048,2,0,1.930735969543457
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,2048,64,0,0.13613439798355104
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,2048,4,0,1.0902544021606446
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,2048,16,0,0.3478656053543091
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,2048,2,0,2.0716831207275392
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,2048,32,0,0.22175199985504152
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,2048,8,0,0.5953167915344239
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,2048,1,0,4.039083099365234
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,3072,16,0,0.5315216064453125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,2048,64,0,0.15637439489364624
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,3072,4,0,1.8323776245117187
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,3072,8,0,0.9628335952758789
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,3072,32,0,0.31854560375213625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,3072,2,0,3.660617446899414
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,3072,64,0,0.21353280544281006
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,3072,1,0,7.389902496337891
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,3072,16,0,0.5686880111694336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,3072,8,0,1.0031200408935548
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,3072,4,0,1.8679008483886719
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,3072,32,0,0.3547231912612915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,3072,64,0,0.242142391204834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,3072,2,0,3.590572738647461
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,3072,1,0,7.008383941650391
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,4096,32,0,0.47385921478271487
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,4096,4,0,2.9065776824951173
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,4096,16,0,0.8070272445678711
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,4096,8,0,1.5018959999084474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,4096,64,0,0.30613279342651367
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,4096,2,0,5.845419311523438
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,4096,8,0,1.4938112258911134
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,4096,16,0,0.8338512420654297
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,4096,32,0,0.5051951885223389
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,4096,64,0,0.336571192741394
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,4096,4,0,2.80263671875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,4096,2,0,5.437704086303711
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,4096,1,0,11.872740936279296
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1,2,0,0.056139200925827026
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,1,1,0,0.0971343994140625
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1,8,0,0.0247871994972229
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1,4,0,0.03440159857273102
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,4096,1,0,10.701700592041016
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1,32,0,0.016551999747753142
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,1,1,0,0.10264159440994262
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1,4,0,0.04126560091972351
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1,16,0,0.02070080041885376
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1,2,0,0.06333280205726624
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1,8,0,0.030904000997543334
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1,64,0,0.014587199687957764
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1,32,0,0.02279040068387985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1,64,0,0.020712000131607056
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,16,1,0,0.12130080461502075
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,16,4,0,0.04118239879608154
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1,16,0,0.02480800002813339
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,16,2,0,0.06842560172080994
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,16,8,0,0.028751999139785767
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,16,32,0,0.015404799580574035
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,16,64,0,0.016524800658226015
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,16,1,0,0.13324639797210694
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,16,16,0,0.02067520022392273
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,16,2,0,0.07823200225830078
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,16,4,0,0.0496832013130188
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,16,32,0,0.024545599520206452
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,16,16,0,0.028859201073646545
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,16,8,0,0.03504799902439117
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,16,64,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,32,1,0,0.15255680084228515
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,32,4,0,0.04735519886016846
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,32,2,0,0.08250240087509156
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,32,16,0,0.022716799378395082
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,32,1,0,0.17152479887008668
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,32,8,0,0.03145920038223267
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,32,2,0,0.0950160026550293
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,32,32,0,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,32,64,0,0.01652639955282211
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,32,4,0,0.05755839943885803
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,32,16,0,0.030907198786735535
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,32,8,0,0.04121760129928589
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,32,32,0,0.024772800505161285
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,64,1,0,0.20959839820861817
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,32,64,0,0.0248416006565094
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,64,2,0,0.11418240070343018
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,64,8,0,0.04117920100688934
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,64,4,0,0.06377599835395813
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,64,16,0,0.02887519896030426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,64,64,0,0.01857759952545166
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,64,1,0,0.2542880058288574
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,64,32,0,0.020632000267505647
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,64,8,0,0.05142239928245544
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,64,2,0,0.1395632028579712
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,64,4,0,0.07907840013504028
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,64,16,0,0.03818239867687225
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,64,32,0,0.02885279953479767
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,64,64,0,0.0268640011548996
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,128,1,0,0.31976640224456787
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,128,4,0,0.09668639898300171
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,128,2,0,0.1722591996192932
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,128,8,0,0.057004797458648684
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,128,16,0,0.03709760010242462
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,128,32,0,0.02709600031375885
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,128,64,0,0.02471359968185425
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,128,1,0,0.40487518310546877
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,128,8,0,0.07398880124092103
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,128,2,0,0.21938080787658693
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,128,16,0,0.049511998891830444
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,128,32,0,0.03709119856357575
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,128,4,0,0.122051203250885
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,128,64,0,0.03294720053672791
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,256,1,0,0.581932783126831
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,256,2,0,0.3070895910263062
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,256,4,0,0.1706112027168274
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,256,16,0,0.06138560175895691
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,256,32,0,0.0411871999502182
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,256,8,0,0.09864159822463989
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,256,64,0,0.035099199414253233
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,256,1,0,0.7307024002075195
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,256,2,0,0.3857935905456543
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,256,16,0,0.07827200293540955
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,256,8,0,0.12583359479904174
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,256,32,0,0.055516797304153445
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,256,4,0,0.21334400177001953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,256,64,0,0.04731999933719635
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,512,1,0,1.2535792350769044
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,512,2,0,0.6500351905822754
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,512,8,0,0.19546879529953004
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,512,32,0,0.08024160265922546
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,512,4,0,0.34691998958587644
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,512,16,0,0.11945439577102661
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,512,64,0,0.055516797304153445
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,512,4,0,0.42701120376586915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,512,1,0,1.5219856262207032
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,512,2,0,0.7930335998535156
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,512,8,0,0.24348959922790528
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,512,16,0,0.14804480075836182
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,512,64,0,0.07193920016288757
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,512,32,0,0.10025919675827026
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,1024,1,0,2.9461999893188477
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1024,64,0,0.11283999681472778
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1024,32,0,0.15904799699783326
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1024,2,0,1.5062975883483887
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1024,8,0,0.4243216037750244
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1024,4,0,0.7795695781707763
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1024,16,0,0.24823200702667236
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,1024,1,0,3.3622623443603517
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1024,8,0,0.5016064167022705
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1024,4,0,0.9129631996154786
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1024,32,0,0.19090399742126465
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1024,16,0,0.29696800708770754
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1024,64,0,0.13855359554290772
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1024,2,0,1.728036880493164
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1536,16,0,0.40141119956970217
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1536,4,0,1.3283472061157227
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1536,32,0,0.24983839988708495
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1536,64,0,0.17251360416412354
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1536,8,0,0.7080927848815918
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1536,2,0,2.597782325744629
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1536,16,0,0.4625999927520752
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1536,8,0,0.8048831939697265
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1536,4,0,1.479092788696289
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,1536,1,0,5.179660797119141
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1536,2,0,2.830295944213867
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1536,32,0,0.293336009979248
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1536,64,0,0.20442399978637696
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,1536,1,0,5.507718276977539
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,2048,32,0,0.3523983955383301
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,2048,64,0,0.23947520256042482
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,2048,4,0,1.9885120391845703
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,2048,16,0,0.5825727939605713
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,2048,2,0,3.9624671936035156
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,2048,8,0,1.044001579284668
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,2048,1,0,7.882595062255859
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,2048,16,0,0.6514336109161377
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,2048,8,0,1.1438079833984376
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,2048,4,0,2.132815933227539
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,2048,64,0,0.2788975954055786
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,2048,2,0,4.096748733520508
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,2048,32,0,0.40491838455200196
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1,2,0,0.09713119864463807
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1,4,0,0.056550401449203494
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1,16,0,0.024772800505161285
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,2048,1,0,7.977848052978516
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1,8,0,0.034857600927352905
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1,64,0,0.01624159961938858
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,1,1,0,0.17958879470825195
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1,32,0,0.018675200641155243
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,1,1,0,0.1829568028450012
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1,2,0,0.1030608057975769
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1,32,0,0.026183998584747313
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1,4,0,0.062403202056884766
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1,16,0,0.031041601300239564
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1,64,0,0.024694399535655977
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,16,2,0,0.1224784016609192
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1,8,0,0.0411871999502182
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,16,1,0,0.22912800312042236
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,16,4,0,0.06821600198745728
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,16,8,0,0.04121600091457367
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,16,32,0,0.020630399882793426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,16,16,0,0.028806400299072266
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,16,4,0,0.07804480195045471
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,16,64,0,0.01658560037612915
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,16,2,0,0.1338703989982605
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,16,8,0,0.051283198595047
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,16,16,0,0.03502880036830902
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,16,32,0,0.028838399052619933
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,16,1,0,0.2496943950653076
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,16,64,0,0.024827200174331664
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,32,16,0,0.03499200046062469
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,32,8,0,0.0497296005487442
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,32,4,0,0.08237919807434083
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,32,2,0,0.1527791976928711
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,32,32,0,0.024719999730587007
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,32,1,0,0.3196016073226929
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,32,1,0,0.2856607913970947
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,32,64,0,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,32,2,0,0.17243679761886596
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,32,4,0,0.09662399888038635
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,32,8,0,0.059671998023986816
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,32,32,0,0.030980798602104186
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,32,16,0,0.0412992000579834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,32,64,0,0.026859200000762938
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,64,4,0,0.11716639995574951
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,64,8,0,0.06585599780082703
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,64,16,0,0.043593600392341614
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,64,32,0,0.030895999073982237
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,64,2,0,0.21143040657043458
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,64,64,0,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,64,1,0,0.40075039863586426
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,64,1,0,0.48266239166259767
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,64,2,0,0.2582079887390137
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,64,4,0,0.14361920356750488
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,64,16,0,0.05551519989967346
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,64,8,0,0.08410400152206421
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,64,64,0,0.03333280086517334
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,64,32,0,0.041223999857902524
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,128,4,0,0.1763200044631958
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,128,16,0,0.062027198076248166
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,128,1,0,0.6186992168426514
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,128,8,0,0.10139039754867554
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,128,32,0,0.04320000112056732
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,128,64,0,0.03293919861316681
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,128,2,0,0.32535200119018554
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,128,1,0,0.7792319774627685
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,128,2,0,0.41196160316467284
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,128,32,0,0.057076799869537356
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,128,16,0,0.08211680054664612
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,128,4,0,0.227622389793396
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,128,64,0,0.045184001326560974
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,128,8,0,0.12938719987869263
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,256,8,0,0.1812559962272644
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,256,32,0,0.07310559749603271
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,256,4,0,0.31787359714508057
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,256,2,0,0.5924928188323975
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,256,64,0,0.0529151976108551
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,256,1,0,1.1398880004882812
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,256,16,0,0.11115360260009766
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,256,4,0,0.40183520317077637
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,256,2,0,0.7463967800140381
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,256,8,0,0.22779200077056885
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,256,1,0,1.426638412475586
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,256,16,0,0.14162240028381348
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,256,32,0,0.09400320053100586
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,256,64,0,0.06989759802818299
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,512,8,0,0.36977760791778563
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,512,4,0,0.6696335792541503
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,512,32,0,0.14004640579223632
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,512,64,0,0.10262399911880493
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,512,2,0,1.2728367805480958
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,512,16,0,0.21786079406738282
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,512,1,0,2.503112030029297
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,512,8,0,0.4539023876190186
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,512,4,0,0.8211503982543945
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,512,2,0,1.550939178466797
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,512,32,0,0.17533600330352783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,512,64,0,0.1291983962059021
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,512,1,0,3.011262321472168
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,512,16,0,0.27099359035491943
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1024,16,0,0.4691743850708008
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1024,4,0,1.5420111656188964
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1024,32,0,0.2925136089324951
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1024,8,0,0.8263615608215332
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1024,2,0,2.9837472915649412
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1024,64,0,0.20521759986877441
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,1024,1,0,6.004889678955078
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1024,8,0,0.9655391693115234
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1024,16,0,0.5577824115753174
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1024,4,0,1.7868879318237305
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1024,64,0,0.24795839786529542
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1024,32,0,0.35211360454559326
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1024,2,0,3.420571136474609
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,1,4,0,0.09761120080947876
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,1,2,0,0.18060319423675536
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,1,16,0,0.03440800011157989
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,1024,1,0,6.668102264404297
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,1,32,0,0.0248416006565094
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,1,8,0,0.05679200291633606
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,1,1,0,0.34645440578460696
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,1,64,0,0.01860480010509491
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,1,1,0,0.3452928066253662
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,1,2,0,0.1828271985054016
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,1,8,0,0.06192319989204407
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,1,16,0,0.041140800714492796
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,1,64,0,0.024873599410057068
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,16,2,0,0.23031361103057862
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,16,1,0,0.438753604888916
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,1,32,0,0.03296639919281006
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,16,4,0,0.1238368034362793
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,1,4,0,0.10351040363311767
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,16,8,0,0.07098879814147949
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,16,16,0,0.04310399889945984
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,16,32,0,0.03094240128993988
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,16,64,0,0.022724799811840057
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,16,1,0,0.4683328151702881
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,16,4,0,0.13642560243606566
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,16,2,0,0.2503727912902832
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,16,16,0,0.05255680084228516
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,16,32,0,0.03914879858493805
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,16,8,0,0.0796992003917694
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,16,64,0,0.030907198786735535
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,32,1,0,0.5530272006988526
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,32,2,0,0.28747360706329345
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,32,4,0,0.15537919998168945
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,32,16,0,0.05161600112915039
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,32,32,0,0.03703039884567261
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,32,8,0,0.08660159707069397
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,32,1,0,0.6111392021179199
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,32,4,0,0.17618720531463622
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,32,2,0,0.32097759246826174
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,32,64,0,0.026881599426269533
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,32,8,0,0.09998559951782227
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,32,16,0,0.0637328028678894
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,32,32,0,0.04527359902858734
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,64,2,0,0.40566720962524416
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,64,4,0,0.21665918827056885
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,64,1,0,0.7838352203369141
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,32,64,0,0.03500320017337799
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,64,8,0,0.12174240350723267
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,64,16,0,0.07203840017318726
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,64,64,0,0.036723199486732486
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,64,32,0,0.04938240051269531
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,64,2,0,0.4884047985076904
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,64,1,0,0.9344752311706543
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,64,16,0,0.08880159854888917
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,64,4,0,0.26362080574035646
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,64,32,0,0.061799997091293336
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,64,64,0,0.04743840098381043
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,64,8,0,0.1495296001434326
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,128,1,0,1.2204895973205567
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,128,16,0,0.1128607988357544
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,128,2,0,0.6311232089996338
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,128,64,0,0.05505759716033935
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,128,4,0,0.3368832111358643
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,128,32,0,0.07612640261650086
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,128,2,0,0.792852783203125
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,128,1,0,1.5321231842041017
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,128,4,0,0.426580810546875
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,128,16,0,0.1452288031578064
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,128,32,0,0.09651679992675781
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,128,64,0,0.07141119837760926
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,128,8,0,0.23972480297088622
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,128,8,0,0.18891040086746216
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,256,16,0,0.20325279235839844
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,256,4,0,0.614631986618042
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,256,8,0,0.34053280353546145
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,256,64,0,0.09449440240859985
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,256,2,0,1.1646127700805664
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,256,1,0,2.258665657043457
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,256,32,0,0.13190239667892456
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,256,8,0,0.428604793548584
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,256,16,0,0.25477120876312254
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,256,4,0,0.7745520114898682
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,256,2,0,1.4603504180908202
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,256,32,0,0.1702304005622864
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,256,64,0,0.12154239416122437
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,256,1,0,2.8265424728393556
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,512,8,0,0.7132944107055664
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,512,4,0,1.319934368133545
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,512,16,0,0.41195039749145507
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,512,2,0,2.5351600646972656
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,512,32,0,0.2621392011642456
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,512,64,0,0.18481760025024413
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,512,1,0,4.964388656616211
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,512,8,0,0.8748784065246582
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,512,32,0,0.3277343988418579
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,512,4,0,1.6073343276977539
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,512,64,0,0.23403360843658447
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,512,2,0,3.0688783645629885
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,512,16,0,0.5111487865447998
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,1,2,0,0.3471184015274048
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,1,4,0,0.18100160360336304
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,1,8,0,0.09957119822502136
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,1,16,0,0.0565887987613678
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,512,1,0,5.987680053710937
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,1,64,0,0.02473759949207306
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,1,32,0,0.035097599029541016
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,1,1,0,0.6802288055419922
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,1,1,0,0.6695328235626221
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,1,4,0,0.18385440111160278
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,1,2,0,0.3463727951049805
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,1,8,0,0.10306400060653687
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,1,16,0,0.06260160207748414
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,1,32,0,0.042556801438331605
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,16,1,0,0.8605504035949707
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,1,64,0,0.0329008013010025
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,16,2,0,0.43953118324279783
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,16,8,0,0.12718080282211303
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,16,4,0,0.23375999927520752
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,16,16,0,0.0723471999168396
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,16,32,0,0.04527679979801178
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,16,64,0,0.033024001121521
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,16,4,0,0.2532527923583984
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,16,8,0,0.13957279920578003
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,16,16,0,0.0840831995010376
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,16,32,0,0.05530400276184082
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,16,64,0,0.04119040071964264
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,16,2,0,0.4720304012298584
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,16,1,0,0.9094592094421386
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,32,1,0,1.0903023719787597
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,32,2,0,0.5567887783050537
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,32,4,0,0.29405760765075684
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,32,8,0,0.15947040319442748
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,32,16,0,0.09264320135116577
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,32,32,0,0.05769439935684204
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,32,64,0,0.04183200001716614
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,32,4,0,0.32768640518188474
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,32,16,0,0.1085312008857727
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,32,8,0,0.18454079627990722
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,32,64,0,0.05321440100669861
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,32,32,0,0.07068639993667603
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,32,1,0,1.1921072006225586
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,32,2,0,0.6159808158874511
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,64,16,0,0.13382719755172728
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,64,2,0,0.7917327880859375
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,64,32,0,0.08282399773597718
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,64,8,0,0.22780001163482666
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,64,4,0,0.416812801361084
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,64,1,0,1.5460864067077638
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,64,64,0,0.05960000157356262
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,64,2,0,0.9492719650268555
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,64,4,0,0.5025983810424804
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,64,8,0,0.2795072078704834
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,64,16,0,0.1630511999130249
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,64,32,0,0.10661120414733886
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,64,1,0,1.8427263259887696
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,64,64,0,0.07613440155982971
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,128,4,0,0.6507696151733399
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,128,8,0,0.35847361087799073
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,128,32,0,0.13571679592132568
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,128,2,0,1.242843246459961
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,128,1,0,2.4188255310058593
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,128,16,0,0.21117279529571534
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,128,64,0,0.09670720100402833
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,128,8,0,0.4545775890350342
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,128,16,0,0.26834878921508787
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,128,4,0,0.8231984138488769
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,128,2,0,1.5569343566894531
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,128,1,0,3.0322736740112304
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,128,32,0,0.1728000044822693
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,256,8,0,0.6603968143463135
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,256,4,0,1.206275177001953
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,128,64,0,0.12634559869766235
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,256,16,0,0.384008002281189
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,256,2,0,2.3092863082885744
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,256,64,0,0.1777343988418579
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,256,32,0,0.24618239402770997
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,256,1,0,4.51012954711914
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,256,8,0,0.8276703834533692
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,256,32,0,0.3118880033493042
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,256,64,0,0.22541439533233643
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,256,2,0,2.885580825805664
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,256,4,0,1.5145359992980958
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,256,1,0,5.628531265258789
SGLang,0.5.9,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,256,16,0,0.48532638549804685
