framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,1,0.010542400181293488
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,1,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,1,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,1,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,1,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,1,0.010545600205659866
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,1,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,1,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,3,0.011337599903345107
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,3,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,1,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,3,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,3,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,1,0.01024319976568222
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,3,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,1,0.009937600046396256
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,3,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,1,0.009836799651384353
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,3,0.010363200306892395
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,1,0.008504000306129456
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,1,0.008489599823951722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,3,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,3,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,3,0.010342399775981902
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,3,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,3,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,7,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,3,0.009404800087213516
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,7,0.011006399989128113
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,7,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,7,0.0105103999376297
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,3,0.008395200222730636
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,7,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,7,0.010764800012111664
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,7,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,7,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,7,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,7,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,7,0.00891520008444786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,7,0.009763199836015701
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,7,0.008654399961233138
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,7,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,15,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,15,0.010524799674749374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,15,0.010545600205659866
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,15,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,15,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,15,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,15,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,15,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,15,0.008753599971532822
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,15,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,15,0.008428800106048583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,15,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,15,0.009806399792432785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,15,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,31,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,31,0.010708799958229065
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,31,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,31,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,31,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,31,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,31,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,31,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,31,0.008551999926567078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,31,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,31,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,31,0.008476799726486206
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,31,0.008555199950933456
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,31,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,63,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,63,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,63,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,63,0.01032480001449585
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,63,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,63,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,63,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,63,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,63,0.009441599994897843
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,63,0.009228800237178803
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,63,0.008558399975299835
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,63,0.008475200086832047
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,63,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,127,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,127,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,127,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,127,0.009384000301361084
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,127,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,63,0.010364799946546554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,127,0.0094991996884346
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,127,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,127,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,127,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,127,0.008452799916267396
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,127,0.008489599823951722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,127,0.008460800349712371
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,127,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,255,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,255,0.01053600013256073
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,255,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,255,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,255,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,127,0.010305599868297577
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,255,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,255,0.010307200253009796
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,255,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,255,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,255,0.008497600257396699
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,255,0.008563199639320373
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,255,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,255,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,255,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,511,0.012583999335765839
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,511,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,511,0.012427199631929398
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,511,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,511,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,511,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,511,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,511,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,511,0.010760000348091126
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,511,0.01050880029797554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,511,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,511,0.011683200299739838
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,511,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,511,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,1023,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,1023,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,1023,0.014606399834156037
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,1023,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,1023,0.014480000734329224
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,1023,0.014524799585342408
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,1023,0.01257600039243698
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,1023,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,1023,0.012731200456619263
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,1023,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,1023,0.014567999541759491
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,1023,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,1023,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,2047,0.016551999747753142
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,2047,0.014508800208568573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,2047,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,1023,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,2047,0.01446399986743927
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,2047,0.015011200308799743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,2047,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,2047,0.01446560025215149
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,2047,0.014470399916172027
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,2047,0.014500799775123595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,2047,0.012803199887275695
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,2047,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,2047,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,4095,0.017008000612258913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,2047,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,4095,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,4095,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,4095,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,2047,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,4095,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,4095,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,4095,0.01446560025215149
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,4095,0.015483200550079346
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,4095,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,4095,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,4095,0.014628799259662628
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,4095,0.013120000064373017
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,8191,0.024607999622821806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,4095,0.013835200667381286
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,8191,0.018731200695037843
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,8191,0.016641600430011748
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,4095,0.01257600039243698
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,8191,0.01632159948348999
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,8191,0.016548800468444824
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,8191,0.01510239988565445
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,8191,0.014924800395965577
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,8191,0.016631999611854555
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,8191,0.018670399487018586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,8191,0.01658080071210861
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,8191,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,8191,0.014511999487876893
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,8191,0.014608000218868256
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,16383,0.02691679894924164
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,8191,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,16383,0.018598400056362152
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,16383,0.0207056000828743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,16383,0.024864000082015992
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,16383,0.02061759978532791
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,16383,0.019596800208091736
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,16383,0.019472000002861024
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,16383,0.020776000618934632
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,16383,0.016921600699424742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,16383,0.01799200028181076
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,16383,0.0177839994430542
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,16383,0.018648000061511995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,16383,0.02067680060863495
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,16383,0.016628800332546233
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,32767,0.04120000004768372
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,32767,0.026872000098228453
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,32767,0.026900801062583923
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,32767,0.02680639922618866
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,32767,0.030668801069259642
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,32767,0.027716800570487976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,32767,0.02698560059070587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,32767,0.02268960028886795
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,32767,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,32767,0.023124800622463228
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,32767,0.02269120067358017
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,32767,0.02757120132446289
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,65535,0.040633600950241086
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,65535,0.043244799971580504
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,65535,0.034964799880981445
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,65535,0.03500480055809021
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,32767,0.02277279943227768
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,32767,0.022784000635147093
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,65535,0.034030398726463316
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,65535,0.033790400624275206
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,65535,0.026881599426269533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,65535,0.03713279962539673
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,65535,0.028815999627113342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,65535,0.03176159858703613
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,65535,0.027243199944496154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,65535,0.027822399139404298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,65535,0.02847839891910553
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,131071,0.07116640210151673
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,65535,0.026929599046707154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,131071,0.059843200445175174
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,131071,0.05312640070915222
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,131071,0.053059202432632444
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,131071,0.05504639744758606
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,131071,0.03950240015983582
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,131071,0.05546560287475586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,131071,0.033076798915863036
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,131071,0.054769599437713624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,131071,0.033020800352096556
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,131071,0.04118880033493042
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,131071,0.03304159939289093
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,131071,0.03704319894313812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,1,0.010521599650382995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,131071,0.0329120010137558
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,1,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,1,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,1,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,1,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,1,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,1,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,1,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,1,0.010281600058078766
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,1,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,3,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,1,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,3,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,1,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,1,0.00841279998421669
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,3,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,1,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,3,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,3,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,3,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,3,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,3,0.008995199948549271
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,3,0.010529600083827972
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,3,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,3,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,7,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,3,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,3,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,3,0.008423999696969987
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,7,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,7,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,7,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,7,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,7,0.010768000036478043
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,7,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,7,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,7,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,7,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,7,0.008441600203514098
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,7,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,7,0.010364799946546554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,15,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,15,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,15,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,15,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,7,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,15,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,15,0.010620799660682679
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,15,0.010384000092744827
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,15,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,15,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,15,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,15,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,31,0.010604800283908844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,31,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,31,0.010558400303125381
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,31,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,15,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,15,0.009387200325727462
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,31,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,31,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,15,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,31,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,31,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,31,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,31,0.010311999917030334
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,31,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,31,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,63,0.010556799918413162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,63,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,31,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,31,0.00921280011534691
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,63,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,63,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,63,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,63,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,63,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,63,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,63,0.00843840017914772
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,63,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,63,0.008425600081682205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,63,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,63,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,63,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,127,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,127,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,127,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,127,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,127,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,127,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,127,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,127,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,127,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,127,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,127,0.008486399799585343
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,127,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,127,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,127,0.010126399993896484
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,255,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,255,0.010529600083827972
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,255,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,255,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,255,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,255,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,255,0.010356800258159637
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,255,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,255,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,255,0.009385599941015243
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,255,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,255,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,511,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,255,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,255,0.009350399672985076
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,511,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,511,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,511,0.012438400089740754
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,511,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,511,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,511,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,511,0.01061279997229576
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,511,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,511,0.011521600186824799
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,511,0.01055999994277954
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,511,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,1023,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,511,0.0110895998775959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,1023,0.014521600306034088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,1023,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,1023,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,1023,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,511,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,1023,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,1023,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,1023,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,1023,0.012443199753761292
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,1023,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,1023,0.012455999851226807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,1023,0.01157120019197464
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,1023,0.01234079971909523
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,2047,0.014595200121402741
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,2047,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,2047,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,2047,0.012620800733566284
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,2047,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,2047,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,2047,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,1023,0.011286400258541107
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,2047,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,2047,0.012455999851226807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,2047,0.01242400035262108
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,2047,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,2047,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,2047,0.011392000317573547
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,4095,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,2047,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,4095,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,4095,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,4095,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,4095,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,4095,0.017319999635219574
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,4095,0.016519999504089354
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,4095,0.012564800679683685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,4095,0.018620799481868743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,4095,0.012462399899959564
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,4095,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,4095,0.014851200580596923
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,4095,0.012569600343704223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,8191,0.02067680060863495
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,8191,0.018694399297237395
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,4095,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,8191,0.01754560023546219
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,8191,0.016654400527477263
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,8191,0.017089599370956422
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,8191,0.01887200027704239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,8191,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,8191,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,8191,0.01664319932460785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,8191,0.014553600549697876
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,16383,0.030979201197624207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,8191,0.01660960018634796
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,8191,0.014571200311183929
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,8191,0.014604799449443817
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,16383,0.02138399928808212
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,16383,0.020744000375270844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,16383,0.022991999983787537
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,16383,0.020606400072574617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,16383,0.020640000700950623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,16383,0.02064799964427948
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,8191,0.014788800477981567
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,16383,0.0186271995306015
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,16383,0.018783999979496
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,16383,0.01775680035352707
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,16383,0.01865919977426529
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,16383,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,16383,0.016628800332546233
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,32767,0.03653120100498199
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,16383,0.01751520037651062
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,32767,0.027534401416778563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,32767,0.026804798841476442
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,32767,0.037601599097251893
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,32767,0.026844799518585205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,32767,0.030953601002693176
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,32767,0.026492801308631898
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,32767,0.02282560020685196
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,32767,0.026572799682617186
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,32767,0.023209600150585173
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,32767,0.026862400770187377
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,32767,0.022761599719524385
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,65535,0.05750880241394043
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,65535,0.05564640164375305
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,32767,0.02279199957847595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,65535,0.05148320198059082
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,65535,0.05278239846229553
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,32767,0.022779199481010436
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,65535,0.05498719811439514
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,65535,0.053904002904891966
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,65535,0.03505280017852783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,65535,0.030763199925422667
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,65535,0.05330399870872497
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,65535,0.03028160035610199
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,65535,0.028944000601768494
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,65535,0.0335072010755539
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,65535,0.030752000212669373
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,131071,0.084334397315979
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,131071,0.0919376015663147
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,65535,0.030137598514556885
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,131071,0.0778656005859375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,131071,0.08012160062789916
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,131071,0.07971199750900268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,131071,0.051560002565383914
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,131071,0.05334879755973816
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,131071,0.07724800109863281
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,131071,0.04828799962997436
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,131071,0.04628320038318634
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,131071,0.07796159982681275
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,131071,0.04600160121917725
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,131071,0.04707199931144714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,1,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,1,0.010814400017261505
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,131071,0.0474128007888794
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,1,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,1,0.010516799986362457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,1,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,1,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,1,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,1,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,1,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,1,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,1,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,1,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,3,0.010715200006961823
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,3,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,1,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,1,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,3,0.012569600343704223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,3,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,3,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,3,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,3,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,3,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,3,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,3,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,3,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,3,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,3,0.009110400080680847
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,3,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,7,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,7,0.01050880029797554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,7,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,7,0.010592000186443329
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,7,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,7,0.010878399759531022
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,7,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,7,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,7,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,7,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,7,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,7,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,15,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,7,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,7,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,15,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,15,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,15,0.012462399899959564
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,15,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,15,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,15,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,15,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,15,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,15,0.010275200009346008
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,15,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,15,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,15,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,31,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,31,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,31,0.010639999806880952
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,15,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,31,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,31,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,31,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,31,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,31,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,31,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,31,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,31,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,31,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,31,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,31,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,63,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,63,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,63,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,63,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,63,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,63,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,63,0.010550399869680404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,63,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,63,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,63,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,63,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,63,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,63,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,127,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,127,0.01167839989066124
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,127,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,63,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,127,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,127,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,127,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,127,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,127,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,127,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,127,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,127,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,127,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,127,0.010107199847698211
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,255,0.011694400012493134
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,255,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,255,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,127,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,255,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,255,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,255,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,255,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,255,0.011097600311040878
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,255,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,255,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,255,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,255,0.010382399708032609
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,255,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,511,0.013052800297737121
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,255,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,511,0.012803199887275695
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,511,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,511,0.012580800056457519
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,511,0.01239679977297783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,511,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,511,0.013120000064373017
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,511,0.011687999963760376
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,511,0.012430399656295776
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,511,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,511,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,511,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,511,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,511,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,1023,0.01465280055999756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,1023,0.014451199769973755
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,1023,0.012547199428081513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,1023,0.015563200414180755
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,1023,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,1023,0.013443200290203095
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,1023,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,1023,0.0137472003698349
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,1023,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,1023,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,1023,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,2047,0.017022399604320525
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,2047,0.01664319932460785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,1023,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,1023,0.012436799705028534
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,2047,0.014596800506114959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,1023,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,2047,0.014273600280284881
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,2047,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,2047,0.012595200538635254
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,2047,0.012542399764060973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,2047,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,2047,0.011667200177907944
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,2047,0.014560000598430633
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,2047,0.012415999919176102
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,2047,0.014595200121402741
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,2047,0.012462399899959564
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,4095,0.020633600652217865
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,2047,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,4095,0.016752000153064727
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,4095,0.018636800348758698
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,4095,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,4095,0.01656319946050644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,4095,0.016595199704170227
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,4095,0.016012799739837647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,4095,0.016518400609493257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,4095,0.014851200580596923
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,4095,0.014553600549697876
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,4095,0.016568000614643096
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,4095,0.014502400159835815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,4095,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,8191,0.021371200680732727
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,8191,0.02001439929008484
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,4095,0.014476799964904785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,8191,0.03094879984855652
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,8191,0.01801439970731735
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,8191,0.018848000466823576
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,8191,0.018688000738620758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,8191,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,8191,0.01852799952030182
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,8191,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,8191,0.016673600673675536
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,8191,0.015022400021553039
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,8191,0.016628800332546233
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,16383,0.03136799931526184
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,16383,0.03326399922370911
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,8191,0.016388800740242005
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,16383,0.022793599963188173
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,8191,0.014664000272750855
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,16383,0.027599999308586122
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,16383,0.021780799329280853
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,16383,0.023291200399398804
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,16383,0.022731199860572815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,16383,0.023095999658107758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,16383,0.023372800648212434
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,16383,0.018824000656604768
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,16383,0.018649600446224213
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,16383,0.018603199720382692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,16383,0.0187376007437706
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,32767,0.05167520046234131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,32767,0.04748960137367249
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,16383,0.018628799915313722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,32767,0.04779039919376373
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,32767,0.04798240065574646
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,32767,0.057580798864364624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,32767,0.04705280065536499
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,32767,0.025377601385116577
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,32767,0.024857600033283234
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,32767,0.033127999305725096
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,32767,0.047207999229431155
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,32767,0.026051199436187743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,32767,0.031167998909950256
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,32767,0.02476000040769577
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,32767,0.024777600169181825
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,65535,0.07949280142784118
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,65535,0.09642080068588257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,65535,0.07190719842910767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,65535,0.072433602809906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,65535,0.07236800193786622
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,65535,0.05287520289421081
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,65535,0.04288159906864166
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,65535,0.07222880125045776
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,65535,0.04129599928855896
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,65535,0.04774720072746277
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,65535,0.042100799083709714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,65535,0.0417248010635376
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,65535,0.04173440039157868
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,65535,0.07385119795799255
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,131071,0.13171520233154296
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,131071,0.16295679807662963
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,131071,0.1251296043395996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,131071,0.12495520114898681
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,131071,0.12380160093307495
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,131071,0.12490400075912475
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,131071,0.07173439860343933
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,131071,0.06412000060081482
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,131071,0.12374720573425294
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,131071,0.06343839764595031
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,131071,0.08339840173721313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,131071,0.0642080008983612
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,131071,0.06352159976959229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,131071,0.06343039870262146
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,1,0.012798400223255157
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,1,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,1,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,1,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,1,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,1,0.010521599650382995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,1,0.01075040027499199
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,1,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,1,0.01210559979081154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,1,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,1,0.009403199702501298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,1,0.010382399708032609
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,1,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,3,0.010679999738931656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,1,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,3,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,3,0.012566399574279786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,3,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,3,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,3,0.010532800108194351
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,3,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,3,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,3,0.010414399951696397
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,3,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,3,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,3,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,3,0.010521599650382995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,7,0.012585599720478059
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,7,0.01050880029797554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,3,0.009001599997282029
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,7,0.010623999685049058
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,7,0.010527999699115753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,7,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,7,0.01242239996790886
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,7,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,7,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,7,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,7,0.010694400221109391
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,7,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,7,0.010353600233793258
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,15,0.012449599802494049
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,15,0.010679999738931656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,15,0.011159999668598175
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,7,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,15,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,15,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,15,0.010539200156927109
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,7,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,15,0.010375999659299851
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,15,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,15,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,15,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,15,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,15,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,15,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,31,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,31,0.010763200372457505
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,31,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,31,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,15,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,31,0.011233600229024887
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,31,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,31,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,31,0.010582400113344192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,31,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,31,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,31,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,31,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,63,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,31,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,63,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,63,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,31,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,63,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,63,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,63,0.01050880029797554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,63,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,63,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,63,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,63,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,63,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,63,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,63,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,127,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,127,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,63,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,127,0.010876800119876861
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,127,0.010524799674749374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,127,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,127,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,127,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,127,0.01053759977221489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,127,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,127,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,127,0.010689599812030793
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,127,0.010360000282526016
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,127,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,255,0.012432000041007996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,255,0.011686400324106217
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,255,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,127,0.009399999678134919
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,255,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,255,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,255,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,255,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,255,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,255,0.01050880029797554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,255,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,255,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,255,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,511,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,255,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,511,0.012595200538635254
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,255,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,511,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,511,0.012481600046157837
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,511,0.01247360035777092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,511,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,511,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,511,0.012579199671745301
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,511,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,511,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,511,0.012439999729394913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,511,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,511,0.010532800108194351
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,511,0.010897599905729295
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,1023,0.016766400635242464
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,1023,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,1023,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,1023,0.01449120044708252
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,1023,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,1023,0.014521600306034088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,1023,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,1023,0.014590400457382201
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,1023,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,1023,0.01244639977812767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,1023,0.012444800138473511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,1023,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,1023,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,2047,0.020681600272655486
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,2047,0.01865600049495697
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,2047,0.016577599942684172
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,1023,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,2047,0.016547200083732606
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,2047,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,2047,0.016543999314308167
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,2047,0.016651199758052827
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,2047,0.014481599628925323
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,2047,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,2047,0.013048000633716583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,2047,0.016019199788570405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,2047,0.013016000390052795
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,2047,0.012639999389648438
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,2047,0.012838399410247803
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,4095,0.02070239931344986
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,4095,0.01857600063085556
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,4095,0.018681600689888
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,4095,0.017136000096797943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,4095,0.02894560098648071
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,4095,0.02067359983921051
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,4095,0.016974399983882903
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,4095,0.016652800142765045
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,4095,0.015574400126934052
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,4095,0.016646400094032288
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,4095,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,4095,0.014508800208568573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,4095,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,8191,0.03285439908504486
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,8191,0.022720000147819518
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,8191,0.02072319984436035
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,8191,0.020695999264717102
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,8191,0.029499199986457825
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,8191,0.020751999318599702
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,8191,0.02484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,8191,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,4095,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,8191,0.016599999368190767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,8191,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,8191,0.02067520022392273
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,8191,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,8191,0.016553600132465363
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,16383,0.05041120052337646
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,8191,0.01666080057621002
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,16383,0.04545600116252899
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,16383,0.04580479860305786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,16383,0.030908799171447753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,16383,0.044968000054359435
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,16383,0.05714719891548157
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,16383,0.045184001326560974
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,16383,0.02290080040693283
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,16383,0.022753599286079406
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,16383,0.03102880120277405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,16383,0.045337599515914914
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,16383,0.022801600396633148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,32767,0.08026880025863647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,32767,0.09446719884872437
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,16383,0.022780799865722658
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,32767,0.0728991985321045
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,32767,0.0715503990650177
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,16383,0.022686399519443512
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,32767,0.07264639735221863
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,32767,0.07281280159950257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,32767,0.07077760100364686
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,32767,0.0469871997833252
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,32767,0.04103040099143982
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,32767,0.040545600652694705
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,32767,0.03965120017528534
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,32767,0.0514303982257843
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,65535,0.13187359571456908
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,65535,0.16503039598464966
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,65535,0.12826080322265626
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,32767,0.040652799606323245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,32767,0.04020639955997467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,65535,0.1253056049346924
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,65535,0.12444800138473511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,65535,0.12459360361099243
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,65535,0.08273280262947083
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,65535,0.06982560157775879
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,65535,0.12733279466629027
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,65535,0.062116801738739014
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,65535,0.06559519767761231
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,65535,0.06294879913330079
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,131071,0.2351088047027588
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,131071,0.30500800609588624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,65535,0.0631663978099823
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,65535,0.061750400066375735
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,131071,0.22803199291229248
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,131071,0.2242959976196289
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,131071,0.22014238834381103
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,131071,0.22225439548492432
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,131071,0.11870399713516236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,131071,0.10506880283355713
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,131071,0.10720479488372803
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,131071,0.22089760303497313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,131071,0.14582079648971558
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,131071,0.10482079982757568
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,131071,0.10478240251541138
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,1,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,1,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,1,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,131071,0.10467040538787842
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,1,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,1,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,1,0.012451200187206269
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,1,0.010543999820947647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,1,0.012412799894809723
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,1,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,1,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,1,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,1,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,1,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,1,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,3,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,3,0.012412799894809723
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,3,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,3,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,3,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,3,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,3,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,3,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,3,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,3,0.012438400089740754
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,3,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,3,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,3,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,3,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,7,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,7,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,7,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,7,0.012547199428081513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,7,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,7,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,7,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,7,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,7,0.010577599704265594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,7,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,7,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,7,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,7,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,7,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,15,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,15,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,15,0.010521599650382995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,15,0.010782399773597717
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,15,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,15,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,15,0.01085119992494583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,15,0.01247360035777092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,15,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,15,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,15,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,15,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,15,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,31,0.012580800056457519
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,31,0.012427199631929398
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,15,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,31,0.010700800269842149
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,31,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,31,0.01144160032272339
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,31,0.010527999699115753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,31,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,31,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,31,0.011081600189208984
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,31,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,31,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,31,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,31,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,31,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,63,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,63,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,63,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,63,0.010435199737548828
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,63,0.011206399649381638
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,63,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,63,0.010523200035095215
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,63,0.011310400068759918
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,63,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,63,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,63,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,63,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,63,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,127,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,127,0.010900799930095673
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,63,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,127,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,127,0.012483199685811996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,127,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,127,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,127,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,127,0.012547199428081513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,127,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,127,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,127,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,127,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,127,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,255,0.011881600320339202
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,127,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,255,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,255,0.013014400005340576
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,255,0.010980799794197083
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,255,0.012608000636100769
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,255,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,255,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,255,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,255,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,255,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,255,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,255,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,255,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,511,0.01653600037097931
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,511,0.013492800295352936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,511,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,255,0.010513599961996078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,511,0.013187199831008911
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,511,0.014567999541759491
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,511,0.012614400684833526
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,511,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,511,0.013575999438762665
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,511,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,511,0.012228800356388092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,511,0.01239520013332367
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,511,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,511,0.010804799944162368
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,1023,0.020635199546813966
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,511,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,1023,0.016648000478744505
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,1023,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,1023,0.01611039936542511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,1023,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,1023,0.01656640022993088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,1023,0.01566080003976822
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,1023,0.014830400049686433
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,1023,0.01257600039243698
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,1023,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,1023,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,1023,0.012695999443531036
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,2047,0.028891199827194215
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,2047,0.020740799605846405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,1023,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,2047,0.017820799350738527
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,2047,0.016697600483894348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,2047,0.018406400084495546
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,1023,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,2047,0.02064799964427948
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,2047,0.01730239987373352
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,2047,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,2047,0.016657599806785585
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,2047,0.014606399834156037
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,2047,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,2047,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,2047,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,4095,0.02069920003414154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,4095,0.028913599252700806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,2047,0.014451199769973755
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,4095,0.030865600705146788
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,4095,0.02075839936733246
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,4095,0.020032000541687012
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,4095,0.018724800646305086
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,4095,0.02066880017518997
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,4095,0.022804799675941467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,4095,0.020657600462436677
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,4095,0.014593599736690522
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,4095,0.016564799845218657
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,4095,0.01517920047044754
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,4095,0.016539199650287627
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,4095,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,8191,0.053780800104141234
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,8191,0.04959039986133575
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,8191,0.04520959854125976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,8191,0.045281600952148435
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,8191,0.04419200122356415
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,8191,0.043731200695037845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,8191,0.0309935986995697
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,8191,0.04317759871482849
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,8191,0.02274080067873001
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,8191,0.022657600045204163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,8191,0.028825598955154418
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,8191,0.022756800055503845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,8191,0.02078240066766739
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,16383,0.07603520154953003
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,16383,0.0725488007068634
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,8191,0.020827199518680572
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,16383,0.09276800155639649
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,16383,0.06997920274734497
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,16383,0.0698144018650055
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,16383,0.0709775984287262
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,16383,0.07166079878807068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,16383,0.04538080096244812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,16383,0.04999360144138336
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,16383,0.03906719982624054
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,16383,0.0389631986618042
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,16383,0.040217599272727965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,16383,0.03907040059566498
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,32767,0.17010719776153566
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,32767,0.12795039415359497
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,16383,0.038913598656654357
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,32767,0.13178720474243164
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,32767,0.12620799541473388
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,32767,0.08616799712181092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,32767,0.0682096004486084
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,32767,0.12565759420394898
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,32767,0.1249776005744934
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,32767,0.06321920156478882
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,32767,0.12629760503768922
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,32767,0.060046398639678956
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,32767,0.060433602333068846
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,32767,0.06056640148162842
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,32767,0.05958240032196045
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,65535,0.3290463924407959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,65535,0.23492159843444824
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,65535,0.22596800327301025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,65535,0.23424959182739258
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,65535,0.22432639598846435
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,65535,0.22734239101409912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,65535,0.11711679697036743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,65535,0.2227952003479004
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,65535,0.1562000036239624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,65535,0.10551199913024903
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,65535,0.10574400424957275
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,65535,0.10389120578765869
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,65535,0.10484639406204224
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,131071,0.4574575901031494
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,131071,0.42360639572143555
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,65535,0.10471839904785156
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,131071,0.42727999687194823
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,131071,0.42584800720214844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,131071,0.6424191951751709
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,131071,0.4272592067718506
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,131071,0.42817277908325196
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,131071,0.22050879001617432
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,131071,0.29882400035858153
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,131071,0.18954720497131347
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,131071,0.1892688035964966
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,131071,0.1896880030632019
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,1,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,1,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,1,0.014459200203418732
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,131071,0.18852319717407226
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,131071,0.18895039558410645
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,1,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,1,0.01170559972524643
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,1,0.012449599802494049
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,1,0.012559999525547028
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,1,0.012451200187206269
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,1,0.012443199753761292
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,1,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,1,0.010996799916028976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,1,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,1,0.011153600364923476
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,1,0.012417600303888322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,3,0.014587199687957764
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,3,0.011939200013875962
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,3,0.012464000284671784
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,3,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,3,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,3,0.01244639977812767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,3,0.012574400007724761
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,3,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,3,0.0110895998775959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,3,0.012428800016641617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,3,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,3,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,3,0.01080320030450821
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,7,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,3,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,7,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,7,0.012564800679683685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,7,0.012606400251388549
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,7,0.012547199428081513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,7,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,7,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,7,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,7,0.01141439974308014
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,7,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,7,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,7,0.010558400303125381
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,7,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,15,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,7,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,15,0.01281599998474121
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,15,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,15,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,15,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,15,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,15,0.01242239996790886
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,15,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,15,0.010883200168609618
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,15,0.01241919994354248
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,15,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,15,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,31,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,31,0.012567999958992004
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,15,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,15,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,31,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,31,0.01244639977812767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,31,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,31,0.012451200187206269
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,31,0.012444800138473511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,31,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,31,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,31,0.01199520006775856
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,31,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,31,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,63,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,31,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,31,0.011217600107192994
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,63,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,63,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,63,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,63,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,63,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,63,0.012455999851226807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,63,0.0105103999376297
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,63,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,63,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,63,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,63,0.012435200065374375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,63,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,127,0.012595200538635254
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,63,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,127,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,127,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,127,0.014524799585342408
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,127,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,127,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,127,0.012542399764060973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,127,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,127,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,127,0.01242400035262108
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,127,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,127,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,127,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,127,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,255,0.014496000111103058
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,255,0.013284799456596375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,255,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,255,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,255,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,255,0.012430399656295776
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,255,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,255,0.01244639977812767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,255,0.011715199798345566
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,255,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,255,0.010555200278759003
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,255,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,511,0.014619199931621552
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,255,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,255,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,511,0.018694399297237395
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,511,0.016835199296474458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,511,0.014595200121402741
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,511,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,511,0.014510400593280792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,511,0.012664000689983367
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,511,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,511,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,511,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,511,0.012455999851226807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,511,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,511,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,1023,0.024820800125598907
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,511,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,1023,0.01663520038127899
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,1023,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,1023,0.018641600012779237
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,1023,0.016648000478744505
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,1023,0.016601599752902985
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,1023,0.016547200083732606
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,1023,0.014596800506114959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,1023,0.016582399606704712
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,1023,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,1023,0.01268640011548996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,1023,0.020640000700950623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,1023,0.012583999335765839
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,2047,0.031062400341033934
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,1023,0.01446560025215149
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,2047,0.02680160105228424
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,2047,0.02041600048542023
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,2047,0.020735999941825865
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,2047,0.018695999681949616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,2047,0.020712000131607056
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,2047,0.020742399990558623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,2047,0.01661120057106018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,2047,0.015372799336910247
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,2047,0.02062080055475235
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,2047,0.0207056000828743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,2047,0.016012799739837647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,2047,0.01465280055999756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,4095,0.0554144024848938
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,2047,0.014630399644374847
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,4095,0.043007999658584595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,4095,0.049318400025367734
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,4095,0.04083200097084046
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,4095,0.026848000288009644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,4095,0.04158880114555359
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,4095,0.041099199652671815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,4095,0.04122560024261475
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,4095,0.020737600326538087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,4095,0.030868801474571227
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,4095,0.019787199795246124
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,4095,0.019896000623703003
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,4095,0.02046239972114563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,8191,0.07089279890060425
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,8191,0.09475679993629456
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,8191,0.06898080110549927
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,8191,0.07654079794883728
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,4095,0.018662400543689728
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,8191,0.06877279877662659
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,8191,0.06902719736099243
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,8191,0.06783999800682068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,8191,0.04527679979801178
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,8191,0.03795360028743744
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,8191,0.036774399876594546
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,8191,0.05129600167274475
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,8191,0.036743998527526855
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,16383,0.12955199480056762
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,8191,0.03683840036392212
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,8191,0.03701759874820709
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,16383,0.17113759517669677
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,16383,0.11977280378341675
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,16383,0.12198400497436523
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,16383,0.12051999568939209
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,16383,0.12162239551544189
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,16383,0.12090239524841309
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,16383,0.06988000273704528
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,16383,0.0860751986503601
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,16383,0.059680002927780154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,16383,0.06084960103034973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,16383,0.05935360193252563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,16383,0.059059202671051025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,16383,0.05939040184020996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,32767,0.23986399173736572
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,32767,0.22763519287109374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,32767,0.22780160903930663
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,32767,0.22904160022735595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,32767,0.22720320224761964
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,32767,0.12134239673614503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,32767,0.3274735927581787
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,32767,0.23038558959960936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,32767,0.10552959442138672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,32767,0.10450719594955445
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,32767,0.10479840040206909
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,32767,0.10453120470046998
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,32767,0.15642399787902833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,65535,0.4687359809875488
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,65535,0.4385791778564453
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,65535,0.4388607978820801
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,32767,0.10452159643173217
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,65535,0.6412879943847656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,65535,0.4379263877868652
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,65535,0.4424736022949219
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,65535,0.29738080501556396
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,65535,0.4369823932647705
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,65535,0.19638240337371826
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,65535,0.19479680061340332
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,65535,0.19419679641723633
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,65535,0.22810559272766112
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,65535,0.19567359685897828
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,65535,0.19460320472717285
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,1,0.012600000202655792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,1,0.012828800082206725
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,1,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,1,0.014496000111103058
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,1,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,1,0.012460800260305405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,1,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,1,0.012411200255155564
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,1,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,1,0.011723200231790543
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,1,0.012359999865293504
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,1,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,1,0.012049599736928939
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,1,0.012059199810028075
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,3,0.014454400539398194
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,3,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,3,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,3,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,3,0.014457599818706512
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,3,0.012433599680662155
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,3,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,3,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,3,0.012451200187206269
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,3,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,3,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,3,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,3,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,3,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,7,0.012464000284671784
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,7,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,7,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,7,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,7,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,7,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,7,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,7,0.012544000148773193
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,7,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,7,0.012542399764060973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,7,0.010532800108194351
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,7,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,7,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,15,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,15,0.012796799838542938
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,7,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,15,0.014555199444293976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,15,0.012542399764060973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,15,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,15,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,15,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,15,0.012544000148773193
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,15,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,15,0.011083199828863143
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,15,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,15,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,15,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,15,0.010521599650382995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,31,0.012580800056457519
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,31,0.01449279934167862
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,31,0.01454080045223236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,31,0.012462399899959564
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,31,0.012588800489902496
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,31,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,31,0.01255040019750595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,31,0.012564800679683685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,31,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,31,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,31,0.010688000172376633
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,31,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,31,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,31,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,63,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,63,0.01451839953660965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,63,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,63,0.014604799449443817
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,63,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,63,0.01247360035777092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,63,0.012620800733566284
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,63,0.010555200278759003
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,63,0.01242400035262108
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,63,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,63,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,63,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,63,0.010529600083827972
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,63,0.012582400441169738
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,127,0.01462399959564209
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,127,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,127,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,127,0.014531199634075165
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,127,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,127,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,127,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,127,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,127,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,127,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,127,0.011003199964761734
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,127,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,127,0.012612800300121307
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,127,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,255,0.014635199308395385
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,255,0.014510400593280792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,255,0.012569600343704223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,255,0.012587200105190276
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,255,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,255,0.012547199428081513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,255,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,255,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,255,0.013683199882507324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,255,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,255,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,255,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,255,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,255,0.01053439974784851
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,511,0.022700800001621245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,511,0.018671999871730804
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,511,0.016257600486278535
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,511,0.0165120005607605
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,511,0.01655520051717758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,511,0.015795199573040007
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,511,0.020718400180339814
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,511,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,511,0.014614400267601014
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,511,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,511,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,511,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,511,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,511,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,1023,0.028999999165534973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,1023,0.020689600706100465
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,1023,0.018615999817848207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,1023,0.01863040030002594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,1023,0.01868479996919632
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,1023,0.030987200140953065
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,1023,0.018688000738620758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,1023,0.018559999763965607
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,1023,0.022652800381183624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,1023,0.016550399363040924
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,1023,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,1023,0.0146479994058609
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,1023,0.014591999351978302
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,2047,0.042263999581336975
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,2047,0.05350720286369324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,2047,0.051020801067352295
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,2047,0.04376319944858551
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,1023,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,2047,0.04185279905796051
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,2047,0.04132960140705109
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,2047,0.04187839925289154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,2047,0.02887359857559204
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,2047,0.020627200603485107
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,2047,0.028948798775672913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,2047,0.01865759938955307
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,2047,0.01860000044107437
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,2047,0.018648000061511995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,4095,0.09192320108413696
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,4095,0.07850559949874877
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,2047,0.018649600446224213
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,4095,0.07140319943428039
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,4095,0.06971200108528137
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,4095,0.06766240000724792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,4095,0.07029439806938172
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,4095,0.04737280011177063
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,4095,0.06944640278816223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,4095,0.04944159984588623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,4095,0.03717760145664215
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,4095,0.0367792010307312
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,4095,0.03904159963130951
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,4095,0.037084800004959104
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,8191,0.1316159963607788
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,4095,0.03669120073318481
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,8191,0.12482719421386719
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,8191,0.1228111982345581
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,8191,0.16823840141296387
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,8191,0.12203999757766723
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,8191,0.07258719801902772
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,8191,0.06167839765548706
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,8191,0.059747201204299924
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,8191,0.12311999797821045
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,8191,0.08419520258903504
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,8191,0.059646397829055786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,8191,0.12315839529037476
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,8191,0.05921440124511719
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,8191,0.05933759808540344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,16383,0.24052801132202148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,16383,0.32159039974212644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,16383,0.23073599338531495
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,16383,0.22915520668029785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,16383,0.12414239645004273
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,16383,0.22799360752105713
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,16383,0.1066256046295166
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,16383,0.22623519897460936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,16383,0.1540495991706848
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,16383,0.2293087959289551
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,16383,0.10580159425735473
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,16383,0.10444320440292358
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,16383,0.10466239452362061
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,16383,0.10452959537506104
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,32767,0.6267888069152832
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,32767,0.43714399337768556
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,32767,0.47132158279418945
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,32767,0.43995041847229005
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,32767,0.43825440406799315
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,32767,0.4412047863006592
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,32767,0.43639039993286133
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,32767,0.2315135955810547
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,32767,0.29304640293121337
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,32767,0.19837440252304078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,32767,0.19409919977188111
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,32767,0.19510719776153565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,32767,0.19589120149612427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,32767,0.1945039987564087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,1,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,1,0.01451520025730133
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,1,0.01661919951438904
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,1,0.020681600272655486
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,1,0.012638400495052337
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,1,0.012604799866676331
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,1,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,1,0.016972799599170686
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,1,0.011953599750995636
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,1,0.01658879965543747
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,1,0.013465599715709686
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,1,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,1,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,1,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,3,0.020721599459648132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,3,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,3,0.014508800208568573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,3,0.014567999541759491
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,3,0.013235199451446533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,3,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,3,0.013153600692749023
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,3,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,3,0.010948800295591355
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,3,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,3,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,3,0.011416000127792359
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,3,0.01244639977812767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,3,0.012409599870443344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,7,0.014580799639225006
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,7,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,7,0.013425600528717042
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,7,0.014553600549697876
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,7,0.01406400054693222
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,7,0.020712000131607056
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,7,0.01446399986743927
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,7,0.017073599994182585
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,7,0.016329599916934966
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,7,0.01247360035777092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,7,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,7,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,7,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,15,0.020662400126457214
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,7,0.011462400108575821
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,15,0.016628800332546233
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,15,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,15,0.014342400431632995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,15,0.014472000300884247
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,15,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,15,0.016547200083732606
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,15,0.012428800016641617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,15,0.013631999492645264
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,15,0.011287999898195266
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,15,0.017448000609874725
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,15,0.011364799737930299
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,15,0.012451200187206269
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,31,0.01658879965543747
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,31,0.020763200521469117
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,15,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,31,0.014532800018787383
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,31,0.014595200121402741
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,31,0.01658399999141693
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,31,0.014451199769973755
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,31,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,31,0.018033599853515624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,31,0.012454400211572647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,31,0.014505599439144135
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,31,0.01088479980826378
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,63,0.020652799308300017
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,31,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,31,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,31,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,63,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,63,0.016582399606704712
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,63,0.014452800154685974
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,63,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,63,0.014480000734329224
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,63,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,63,0.014521600306034088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,63,0.018292799592018127
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,63,0.012433599680662155
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,63,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,63,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,63,0.016524800658226015
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,63,0.01242400035262108
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,127,0.014545600116252898
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,127,0.014502400159835815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,127,0.01656319946050644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,127,0.020660799741744996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,127,0.014531199634075165
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,127,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,127,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,127,0.01723040044307709
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,127,0.016545599699020384
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,127,0.01454080045223236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,127,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,127,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,127,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,127,0.01101280003786087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,255,0.01870879977941513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,255,0.02070080041885376
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,255,0.014580799639225006
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,255,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,255,0.01830720007419586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,255,0.013915200531482697
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,255,0.012547199428081513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,255,0.01449120044708252
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,255,0.01451839953660965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,255,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,255,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,255,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,255,0.012483199685811996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,511,0.026878398656845093
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,255,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,511,0.018622399866580965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,511,0.017566399276256563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,511,0.018587200343608855
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,511,0.033302399516105655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,511,0.02030559927225113
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,511,0.018214400112628936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,511,0.020716799795627593
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,511,0.01860000044107437
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,511,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,511,0.014588800072669984
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,511,0.014455999433994293
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,1023,0.05491999983787536
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,1023,0.04147039949893951
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,1023,0.04856640100479126
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,1023,0.04126079976558685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,511,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,511,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,1023,0.040987199544906615
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,1023,0.039735999703407285
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,1023,0.027028799057006836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,1023,0.018697600066661834
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,1023,0.031615999341011045
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,1023,0.016612799465656282
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,1023,0.03994719982147217
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,1023,0.018505600094795228
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,1023,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,2047,0.07589439749717712
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,1023,0.01857600063085556
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,2047,0.0951903998851776
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,2047,0.06828799843788147
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,2047,0.06716160178184509
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,2047,0.06814720034599304
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,2047,0.0667743980884552
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,2047,0.03712640106678009
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,2047,0.045465600490570066
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,2047,0.06810879707336426
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,2047,0.03524799942970276
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,2047,0.053414398431777955
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,2047,0.03682880103588104
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,2047,0.03659360110759735
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,2047,0.03545919954776764
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,4095,0.12896000146865844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,4095,0.1724063992500305
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,4095,0.1210096001625061
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,4095,0.0707535982131958
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,4095,0.11985759735107422
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,4095,0.11913440227508545
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,4095,0.12186239957809449
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,4095,0.0607200026512146
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,4095,0.12019360065460205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,4095,0.08812639713287354
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,4095,0.05868160128593445
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,4095,0.05925120115280151
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,4095,0.05818880200386047
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,4095,0.058830398321151736
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,8191,0.3251424074172974
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,8191,0.23773760795593263
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,8191,0.2226032018661499
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,8191,0.2269711971282959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,8191,0.2262415885925293
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,8191,0.22544000148773194
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,8191,0.226910400390625
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,8191,0.12200640439987183
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,8191,0.1583135962486267
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,8191,0.1037600040435791
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,8191,0.10386879444122314
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,8191,0.10377600193023681
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,8191,0.10584959983825684
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,8191,0.10386559963226319
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,16383,0.46283841133117676
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,16383,0.6297423839569092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,16383,0.4399007797241211
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,16383,0.4340832233428955
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,16383,0.23027520179748534
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,16383,0.4332911968231201
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,16383,0.297163200378418
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,16383,0.19430400133132936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,16383,0.19547040462493898
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,16383,0.43224802017211916
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,16383,0.1935520052909851
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,1,0.03110080063343048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,16383,0.4357952117919922
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,16383,0.19363839626312257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,16383,0.19303200244903565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,1,0.020596800744533537
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,1,0.020059199631214143
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,1,0.024758400022983552
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,1,0.020713600516319274
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,1,0.018724800646305086
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,1,0.02282720059156418
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,1,0.026811200380325317
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,1,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,1,0.014804799854755402
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,1,0.018628799915313722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,1,0.01852000057697296
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,3,0.024809600412845613
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,3,0.03124319911003113
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,3,0.020665599405765532
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,1,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,1,0.015011200308799743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,3,0.018694399297237395
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,3,0.018691200017929076
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,3,0.023203200101852416
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,3,0.019875200092792512
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,3,0.026843199133872987
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,3,0.016468800604343414
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,3,0.01865279972553253
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,3,0.01871040016412735
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,3,0.014609600603580474
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,3,0.015702399611473083
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,7,0.032446399331092834
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,3,0.01573760062456131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,7,0.020660799741744996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,7,0.024843199551105498
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,7,0.018751999735832213
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,7,0.022806400060653688
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,7,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,7,0.01966399997472763
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,7,0.017164799571037292
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,7,0.01886879950761795
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,7,0.016601599752902985
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,7,0.026366400718688964
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,7,0.01613280028104782
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,7,0.014572800695896148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,7,0.016356800496578217
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,15,0.02481919974088669
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,15,0.03210560083389282
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,15,0.020695999264717102
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,15,0.020665599405765532
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,15,0.023209600150585173
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,15,0.018464000523090364
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,15,0.019630399346351624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,15,0.026481598615646362
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,15,0.0191551998257637
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,15,0.01982239931821823
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,15,0.01664000004529953
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,15,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,15,0.01478399932384491
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,31,0.02479359954595566
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,31,0.031753599643707275
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,31,0.02070239931344986
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,15,0.016521599888801575
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,31,0.020475199818611144
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,31,0.019067199528217317
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,31,0.020689600706100465
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,31,0.023052799701690673
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,31,0.026545599102973938
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,31,0.020017600059509276
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,31,0.018670399487018586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,31,0.016463999450206757
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,31,0.015401600301265717
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,63,0.02481919974088669
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,63,0.03138880133628845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,31,0.016531200706958772
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,31,0.016582399606704712
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,63,0.020411199331283568
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,63,0.020552000403404234
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,63,0.01905599981546402
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,63,0.020207999646663664
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,63,0.022758400440216063
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,63,0.016596800088882445
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,63,0.01658399999141693
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,63,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,63,0.018624000251293182
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,63,0.02677760124206543
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,63,0.020729599893093108
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,63,0.0151296004652977
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,127,0.026451200246810913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,127,0.020686399936676026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,127,0.03297759890556336
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,127,0.020764799416065217
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,127,0.01866399943828583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,127,0.023611199855804444
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,127,0.020472000539302825
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,127,0.02680320143699646
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,127,0.018649600446224213
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,127,0.016436800360679626
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,127,0.02004159986972809
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,127,0.01480800062417984
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,127,0.016200000047683717
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,255,0.03691839873790741
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,255,0.019091199338436126
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,255,0.022526399791240694
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,255,0.03741439878940582
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,255,0.019942399859428406
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,255,0.0193231999874115
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,127,0.016601599752902985
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,255,0.026940798759460448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,255,0.020326399803161622
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,255,0.025974398851394652
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,255,0.018620799481868743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,255,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,255,0.015801599621772765
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,511,0.05368959903717041
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,511,0.043665599822998044
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,511,0.042737600207328794
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,255,0.01663679927587509
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,511,0.0596448004245758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,511,0.041403201222419736
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,511,0.041361600160598755
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,511,0.041464000940322876
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,255,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,511,0.03835040032863617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,511,0.0370608001947403
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,511,0.019707199931144715
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,511,0.020623999834060668
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,511,0.020681600272655486
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,511,0.020703999698162077
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,1023,0.09845920205116272
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,511,0.023127999901771546
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,1023,0.0705680012702942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,1023,0.06773759722709656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,1023,0.06916800141334534
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,1023,0.06792320013046264
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,1023,0.08454560041427613
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,1023,0.05267360210418701
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,1023,0.0678160011768341
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,1023,0.03955360054969788
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,1023,0.03711999952793121
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,1023,0.03714079856872558
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,1023,0.057183998823165896
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,1023,0.03697440028190613
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,2047,0.13722879886627198
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,2047,0.17645440101623536
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,2047,0.12202880382537842
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,2047,0.11984800100326538
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,1023,0.03687199950218201
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,2047,0.11837760210037232
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,2047,0.12036639451980591
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,2047,0.0802287995815277
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,2047,0.11838879585266113
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,2047,0.061615997552871705
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,2047,0.06392959952354431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,2047,0.09263520240783692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,2047,0.06034079790115356
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,2047,0.06065599918365479
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,4095,0.24481759071350098
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,2047,0.06151840090751648
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,4095,0.2251807928085327
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,4095,0.22402079105377198
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,4095,0.3294064044952393
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,4095,0.22339038848876952
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,4095,0.22185759544372557
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,4095,0.1314687967300415
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,4095,0.22464640140533448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,4095,0.11106719970703124
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,4095,0.1076464056968689
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,4095,0.1631711959838867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,4095,0.10647519826889038
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,4095,0.10587040185928345
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,8191,0.4650928020477295
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,4095,0.106876802444458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,8191,0.4332287788391113
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,8191,0.6302656173706055
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,8191,0.4384768009185791
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,8191,0.4297599792480469
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,8191,0.43181281089782714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,8191,0.23826560974121094
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,8191,0.20118720531463624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,8191,0.43117599487304686
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,8191,0.19680000543594361
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,8191,0.30451838970184325
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,8191,0.19622880220413208
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,8191,0.1980784058570862
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,1,0.045212799310684205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,1,0.052934402227401735
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,1,0.029931199550628663
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,1,0.03298720121383667
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,8191,0.1958896040916443
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,1,0.028934401273727418
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,1,0.029716798663139345
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,1,0.03916000127792359
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,1,0.028830400109291075
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,1,0.04341599941253662
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,1,0.022776000201702118
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,1,0.022686399519443512
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,3,0.04523519873619079
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,1,0.026844799518585205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,1,0.022785599529743194
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,3,0.05310400128364563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,1,0.022884799540042876
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,3,0.03296479880809784
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,3,0.028907200694084166
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,3,0.030803200602531434
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,3,0.028883200883865357
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,3,0.039323198795318606
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,3,0.043823999166488645
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,3,0.029640001058578492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,3,0.024491199851036073
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,3,0.022912000119686127
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,3,0.022734400629997254
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,3,0.022750400006771088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,3,0.02680160105228424
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,7,0.03094080090522766
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,7,0.02954559922218323
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,7,0.030928000807762146
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,7,0.053251200914382936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,7,0.029203200340270997
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,7,0.045307201147079465
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,7,0.03296320140361786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,7,0.039087998867034915
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,7,0.04421600103378296
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,7,0.026888000965118408
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,7,0.024694399535655977
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,7,0.022785599529743194
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,7,0.02276639938354492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,7,0.023179200291633607
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,15,0.033057600259780884
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,15,0.04534879922866821
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,15,0.030691200494766237
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,15,0.029548799991607665
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,15,0.030982398986816408
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,15,0.05356799960136414
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,15,0.02953760027885437
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,15,0.04473919868469238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,15,0.039540800452232364
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,15,0.02369280010461807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,15,0.022767999768257143
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,15,0.022915199398994446
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,15,0.02691200077533722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,31,0.04663839936256409
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,31,0.05380799770355225
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,15,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,31,0.030873599648475646
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,31,0.029151999950408937
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,31,0.032996800541877744
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,31,0.030947199463844298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,31,0.044537600874900815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,31,0.024033600091934205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,31,0.04097760021686554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,31,0.030460798740386964
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,31,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,31,0.026855999231338502
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,31,0.02277279943227768
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,31,0.022864000499248506
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,63,0.04604319930076599
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,63,0.055567997694015506
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,63,0.0329584002494812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,63,0.029827201366424562
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,63,0.031009599566459656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,63,0.03095200061798096
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,63,0.044809600710868834
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,63,0.02941280007362366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,63,0.026862400770187377
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,63,0.041201600432395936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,63,0.022779199481010436
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,63,0.023019200563430785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,63,0.022767999768257143
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,127,0.0488864004611969
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,127,0.035051199793815616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,63,0.022920000553131103
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,127,0.030947199463844298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,127,0.030108800530433653
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,127,0.0432671993970871
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,127,0.03089759945869446
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,127,0.029475200176239013
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,127,0.045238399505615236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,127,0.057550400495529175
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,127,0.02274879962205887
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,127,0.02309119999408722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,127,0.02705279886722565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,127,0.02481119930744171
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,255,0.061540800333023074
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,127,0.022819200158119203
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,255,0.06579520106315613
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,255,0.04291520118713379
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,255,0.04191839993000031
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,255,0.04129759967327118
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,255,0.04731999933719635
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,255,0.04205760061740875
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,255,0.0514735996723175
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,255,0.02890239953994751
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,255,0.04726879894733429
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,255,0.022750400006771088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,511,0.09895840287208557
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,255,0.024784000217914583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,255,0.023470400273799895
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,511,0.07571200132369996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,511,0.0958736002445221
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,255,0.022804799675941467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,511,0.07203360199928284
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,511,0.07100319862365723
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,511,0.07027040123939514
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,511,0.039105600118637084
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,511,0.06557120084762573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,511,0.06147199869155884
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,511,0.06907359957695007
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,511,0.04415520131587982
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,511,0.04065119922161102
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,511,0.03898400068283081
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,1023,0.14989919662475587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,511,0.03914400041103363
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,1023,0.12697279453277588
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,1023,0.12280960083007812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,1023,0.16994719505310057
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,1023,0.12046560049057006
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,1023,0.12216800451278687
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,1023,0.12096320390701294
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,1023,0.09228159785270691
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,1023,0.06786239743232728
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,1023,0.0617904007434845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,1023,0.062483197450637816
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,1023,0.09510400295257568
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,1023,0.06154400110244751
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,2047,0.3117919921875
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,2047,0.2594399929046631
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,1023,0.0636575996875763
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,2047,0.23107678890228273
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,2047,0.2263871908187866
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,2047,0.14432640075683595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,2047,0.2246000051498413
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,2047,0.2234208106994629
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,2047,0.15987520217895507
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,2047,0.22372000217437743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,2047,0.1109279990196228
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,2047,0.10887360572814941
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,2047,0.1094704031944275
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,2047,0.10820480585098266
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,4095,0.48781599998474123
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,4095,0.5812272071838379
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,2047,0.1152575969696045
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,4095,0.43227200508117675
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,4095,0.4315904140472412
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,4095,0.4315455913543701
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,4095,0.44017601013183594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,4095,0.2011104106903076
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,4095,0.2022655963897705
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,4095,0.1999824047088623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,4095,0.2873392105102539
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,4095,0.42961759567260743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,4095,0.2554368019104004
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,4095,0.20863039493560792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,4095,0.1992303967475891
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,1,0.07839199900627136
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,1,0.09989439845085143
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,1,0.054364800453186035
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,1,0.04884159862995148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,1,0.04963200092315674
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,1,0.049404799938201904
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,1,0.048895999789237976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,1,0.08014559745788574
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,1,0.04533120095729828
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,1,0.03915199935436249
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,1,0.07268159985542297
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,1,0.03733119964599609
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,1,0.03704000115394592
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,3,0.10041279792785644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,3,0.07845439910888671
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,3,0.05357760190963745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,1,0.03716799914836884
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,3,0.04951840043067932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,3,0.04839200079441071
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,3,0.04888319969177246
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,3,0.0724511981010437
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,3,0.0491344004869461
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,3,0.04525279998779297
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,3,0.03718400001525879
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,3,0.08013120293617249
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,3,0.03714880049228668
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,3,0.039131200313568114
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,7,0.07867839932441711
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,3,0.03703519999980927
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,7,0.05388000011444092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,7,0.050108802318572995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,7,0.10004479885101318
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,7,0.049132800102233885
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,7,0.07298240065574646
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,7,0.049300798773765565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,7,0.049358400702476504
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,7,0.045388799905776975
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,7,0.0800927996635437
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,7,0.03735359907150269
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,7,0.03713760077953339
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,7,0.03914720118045807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,15,0.10059679746627807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,7,0.03710240125656128
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,15,0.05533120036125183
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,15,0.07831839919090271
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,15,0.04938240051269531
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,15,0.04935039877891541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,15,0.04943679869174957
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,15,0.04919199943542481
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,15,0.07355679869651795
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,15,0.04556480050086975
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,15,0.037171199917793274
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,15,0.03713119924068451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,15,0.08054720163345337
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,15,0.03914400041103363
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,15,0.03701600134372711
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,31,0.10055040121078491
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,31,0.05616000294685364
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,31,0.049833598732948306
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,31,0.049307200312614444
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,31,0.07814559936523438
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,31,0.04939840137958527
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,31,0.08185279965400696
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,31,0.04936800003051758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,31,0.04577760100364685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,31,0.03918879926204681
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,31,0.03767519891262054
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,31,0.07397440075874329
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,31,0.03720319867134094
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,63,0.07980319857597351
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,63,0.05026559829711914
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,63,0.05154399871826172
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,63,0.05839040279388428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,63,0.10097600221633911
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,63,0.049617600440979
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,63,0.07417119741439819
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,63,0.0829904019832611
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,63,0.04940159916877747
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,63,0.045638400316238406
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,31,0.03711360096931458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,63,0.039233601093292235
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,63,0.0371535986661911
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,63,0.03758879899978638
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,63,0.03712159991264343
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,127,0.08861920237541199
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,127,0.06357759833335877
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,127,0.1022271990776062
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,127,0.05546720027923584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,127,0.07396159768104553
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,127,0.08231199979782104
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,127,0.05757279992103577
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,127,0.05523359775543213
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,127,0.04942240118980408
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,127,0.05534719824790955
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,127,0.03768480122089386
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,127,0.04120000004768372
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,127,0.03712800145149231
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,127,0.03713760077953339
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,255,0.07634720206260681
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,255,0.10971360206604004
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,255,0.06962080001831054
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,255,0.07119200229644776
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,255,0.11318880319595337
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,255,0.06732800006866455
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,255,0.06886399984359741
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,255,0.08011839985847473
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,255,0.08808000087738037
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,255,0.05137760043144226
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,255,0.04469760060310364
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,255,0.04139359891414642
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,255,0.04333919882774353
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,511,0.18397599458694458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,511,0.12986719608306885
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,511,0.1666000008583069
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,255,0.04151679873466492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,511,0.11997120380401612
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,511,0.11893600225448608
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,511,0.12373600006103516
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,511,0.10884480476379395
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,511,0.10959039926528931
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,511,0.11805440187454223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,511,0.07232800126075745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,511,0.06265760064125062
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,511,0.06376479864120484
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,1023,0.27041919231414796
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,1023,0.32728478908538816
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,511,0.06568959951400757
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,511,0.06211199760437012
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,1023,0.2176975965499878
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,1023,0.22624640464782714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,1023,0.21485600471496583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,1023,0.2128351926803589
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,1023,0.2116528034210205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,1023,0.15914880037307738
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,1023,0.10686559677124023
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,1023,0.11049120426177979
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,1023,0.11742880344390869
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,1023,0.10523040294647217
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,1023,0.17604320049285888
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,1023,0.10472320318222046
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,2047,0.48226399421691896
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,2047,0.6087024211883545
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,2047,0.42269439697265626
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,2047,0.40775041580200194
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,2047,0.4129551887512207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,2047,0.2673583984375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,2047,0.4093183994293213
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,2047,0.2063904047012329
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,2047,0.3046911954879761
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,2047,0.40785441398620603
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,2047,0.19447040557861328
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,2047,0.1919535994529724
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,2047,0.19844479560852052
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,2047,0.19278719425201415
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,1,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,1,0.01055999994277954
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,1,0.010689599812030793
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,1,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,1,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,1,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,1,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,1,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,1,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,1,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,1,0.0084927998483181
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,1,0.0084927998483181
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,1,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,1,0.008416000008583068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,3,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,3,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,3,0.011052799969911575
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,3,0.010382399708032609
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,3,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,3,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,3,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,3,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,3,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,3,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,3,0.008455999940633774
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,3,0.008718399703502655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,7,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,3,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,3,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,7,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,7,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,7,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,7,0.009399999678134919
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,7,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,7,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,7,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,7,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,7,0.008414400368928909
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,7,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,7,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,7,0.008481600135564805
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,7,0.008523199707269669
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,15,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,15,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,15,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,15,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,15,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,15,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,15,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,15,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,15,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,15,0.00944959968328476
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,15,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,15,0.008449599891901017
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,15,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,31,0.010540799796581268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,31,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,31,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,15,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,31,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,31,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,31,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,31,0.010355199873447418
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,31,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,31,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,31,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,31,0.008428800106048583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,31,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,31,0.009460800141096116
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,31,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,63,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,63,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,63,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,63,0.01098719984292984
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,63,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,63,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,63,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,63,0.00936800017952919
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,63,0.01021760031580925
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,63,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,63,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,63,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,127,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,63,0.008401600271463394
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,127,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,63,0.010340800136327743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,127,0.010452800244092942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,127,0.0110944002866745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,127,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,127,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,127,0.009379199892282485
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,127,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,127,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,127,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,255,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,127,0.008459199965000153
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,255,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,127,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,255,0.010547199845314026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,127,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,127,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,255,0.010521599650382995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,255,0.01043199971318245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,255,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,255,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,255,0.010564800351858139
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,255,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,255,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,255,0.008462399989366532
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,255,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,255,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,255,0.008451200276613235
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,511,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,511,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,511,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,511,0.012569600343704223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,511,0.011588799953460693
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,511,0.012904000282287598
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,511,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,511,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,511,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,511,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,511,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,511,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,511,0.010599999874830245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,1023,0.015039999783039094
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,1023,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,511,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,1023,0.012582400441169738
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,1023,0.014606399834156037
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,1023,0.01629440039396286
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,1023,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,1023,0.014577600359916686
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,1023,0.014567999541759491
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,1023,0.012569600343704223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,1023,0.014257599413394929
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,1023,0.012439999729394913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,2047,0.01669600009918213
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,1023,0.013870400190353394
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,1023,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,1023,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,2047,0.015721599757671356
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,2047,0.01664000004529953
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,2047,0.013337600231170654
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,2047,0.014593599736690522
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,2047,0.014545600116252898
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,2047,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,2047,0.01475680023431778
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,2047,0.014366400241851807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,2047,0.014455999433994293
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,2047,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,2047,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,2047,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,4095,0.02136159986257553
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,4095,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,2047,0.014502400159835815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,4095,0.014502400159835815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,4095,0.016633599996566772
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,4095,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,4095,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,4095,0.017715199291706084
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,4095,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,4095,0.015067200362682342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,4095,0.014545600116252898
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,4095,0.012566399574279786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,8191,0.026398399472236635
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,4095,0.01451520025730133
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,8191,0.024564799666404725
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,4095,0.014459200203418732
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,4095,0.014588800072669984
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,8191,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,8191,0.01655679941177368
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,8191,0.016572800278663636
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,8191,0.01462240070104599
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,8191,0.020686399936676026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,8191,0.016604800522327424
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,8191,0.014560000598430633
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,8191,0.01467519998550415
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,8191,0.018675200641155243
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,8191,0.014590400457382201
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,8191,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,8191,0.01664000004529953
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,16383,0.03918400108814239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,16383,0.0268095999956131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,16383,0.024883200228214265
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,16383,0.01870719939470291
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,16383,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,16383,0.02070080041885376
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,16383,0.02072480022907257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,16383,0.02069920003414154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,16383,0.026859200000762938
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,16383,0.016683200001716615
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,16383,0.016595199704170227
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,16383,0.02020000070333481
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,16383,0.01860480010509491
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,32767,0.0330128014087677
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,16383,0.0181551992893219
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,32767,0.04020639955997467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,32767,0.02871679961681366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,32767,0.030937600135803222
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,32767,0.026833599805831908
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,32767,0.030364799499511718
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,32767,0.02688640058040619
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,32767,0.026795199513435362
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,32767,0.023099200427532197
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,32767,0.02287199944257736
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,32767,0.022683200240135194
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,32767,0.026843199133872987
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,32767,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,32767,0.022716799378395082
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,65535,0.04318079948425293
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,65535,0.03511199951171875
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,65535,0.03505600094795227
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,65535,0.039124798774719236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,65535,0.03792639970779419
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,65535,0.034806400537490845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,65535,0.03424319922924042
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,65535,0.03304159939289093
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,65535,0.0311024010181427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,65535,0.02852639853954315
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,65535,0.028934401273727418
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,131071,0.05988479852676391
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,65535,0.02874079942703247
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,131071,0.07111200094223022
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,65535,0.02688319981098175
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,65535,0.026855999231338502
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,131071,0.05568159818649292
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,131071,0.05345919728279114
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,131071,0.052215999364852904
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,131071,0.0544048011302948
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,131071,0.04124319851398468
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,131071,0.05535200238227844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,131071,0.03380320072174072
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,131071,0.039103999733924866
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,131071,0.03296799957752228
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,131071,0.03297759890556336
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,131071,0.04282079935073853
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,1,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,131071,0.033344000577926636
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,1,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,1,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,1,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,1,0.011392000317573547
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,1,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,1,0.010611200332641601
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,1,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,1,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,1,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,1,0.00941760018467903
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,1,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,1,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,1,0.009428799897432328
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,3,0.012427199631929398
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,3,0.010777600109577179
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,3,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,3,0.010524799674749374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,3,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,3,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,3,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,3,0.008507200330495835
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,3,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,3,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,3,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,3,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,3,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,3,0.010180799663066864
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,7,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,7,0.010553599894046783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,7,0.010516799986362457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,7,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,7,0.010552000254392624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,7,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,7,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,7,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,7,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,7,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,7,0.010896000266075134
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,15,0.010542400181293488
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,7,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,7,0.009692800045013428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,15,0.012080000340938568
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,7,0.00950239971280098
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,15,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,15,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,15,0.01037440001964569
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,15,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,15,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,15,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,15,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,15,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,15,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,15,0.008436799794435502
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,15,0.008417599648237229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,15,0.009644799679517747
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,31,0.010556799918413162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,31,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,31,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,31,0.01194240003824234
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,31,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,31,0.01055999994277954
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,31,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,31,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,31,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,31,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,31,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,31,0.008455999940633774
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,31,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,31,0.008544000238180161
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,63,0.010542400181293488
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,63,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,63,0.010652799904346467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,63,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,63,0.010649599879980088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,63,0.01067200005054474
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,63,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,63,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,63,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,63,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,63,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,63,0.008718399703502655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,63,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,127,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,127,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,127,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,127,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,127,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,63,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,127,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,127,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,127,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,127,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,127,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,127,0.008534400165081025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,127,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,255,0.010521599650382995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,127,0.008430399745702744
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,255,0.010545600205659866
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,127,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,255,0.010543999820947647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,255,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,255,0.010592000186443329
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,255,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,255,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,255,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,255,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,255,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,255,0.009084799885749817
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,255,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,511,0.012574400007724761
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,255,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,255,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,511,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,511,0.012809599936008453
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,511,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,511,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,511,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,511,0.012439999729394913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,511,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,511,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,511,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,511,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,511,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,511,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,1023,0.014596800506114959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,1023,0.015035200119018554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,1023,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,1023,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,1023,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,1023,0.013067199289798737
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,511,0.01050880029797554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,1023,0.014564800262451171
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,1023,0.012934400141239167
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,1023,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,1023,0.012433599680662155
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,1023,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,1023,0.012057600170373916
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,1023,0.012439999729394913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,2047,0.01655679941177368
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,2047,0.014528000354766845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,2047,0.01449120044708252
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,2047,0.0125231996178627
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,1023,0.011326400190591812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,2047,0.01661760061979294
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,2047,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,2047,0.012556800246238708
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,2047,0.014596800506114959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,2047,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,2047,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,2047,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,2047,0.012409599870443344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,4095,0.020638400316238405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,4095,0.018587200343608855
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,2047,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,4095,0.017451199889183044
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,2047,0.011072000116109848
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,4095,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,4095,0.013447999954223633
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,4095,0.012559999525547028
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,4095,0.01666879951953888
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,4095,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,4095,0.014630399644374847
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,4095,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,4095,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,4095,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,8191,0.030976000428199767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,4095,0.016604800522327424
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,4095,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,8191,0.018688000738620758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,8191,0.018694399297237395
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,8191,0.01663679927587509
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,8191,0.01655520051717758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,8191,0.016680000722408293
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,8191,0.02138720005750656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,8191,0.01656000018119812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,8191,0.014646400511264802
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,8191,0.02276639938354492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,8191,0.014480000734329224
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,8191,0.01659200042486191
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,8191,0.01783200055360794
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,16383,0.030988800525665283
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,8191,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,16383,0.02422879934310913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,16383,0.02072799950838089
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,16383,0.026980799436569215
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,16383,0.020670400559902193
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,16383,0.022448000311851502
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,16383,0.020734399557113647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,16383,0.02484000027179718
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,16383,0.01870879977941513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,16383,0.018588800728321076
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,16383,0.017667199671268462
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,16383,0.022806400060653688
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,32767,0.037099200487136844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,16383,0.018580800294876097
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,32767,0.03549439907073974
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,16383,0.018012799322605133
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,32767,0.028347200155258177
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,32767,0.026796799898147584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,32767,0.03285279870033264
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,32767,0.026134398579597474
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,32767,0.030926400423049928
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,32767,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,32767,0.02687999904155731
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,32767,0.026807999610900878
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,32767,0.02272160053253174
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,32767,0.028683200478553772
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,32767,0.022726400196552275
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,65535,0.04955039918422699
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,65535,0.05737599730491638
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,65535,0.05159519910812378
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,65535,0.051507198810577394
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,32767,0.02282080054283142
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,65535,0.053780800104141234
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,65535,0.03697920143604279
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,65535,0.035046398639678955
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,65535,0.056764799356460574
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,65535,0.05305600166320801
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,65535,0.03298879861831665
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,65535,0.03091999888420105
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,65535,0.029473599791526795
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,131071,0.07621920108795166
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,65535,0.030934399366378783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,131071,0.08442080020904541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,131071,0.0923904001712799
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,65535,0.030913600325584413
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,131071,0.07671679854393006
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,131071,0.07827039957046508
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,131071,0.07912160158157348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,131071,0.0802287995815277
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,131071,0.047393599152565004
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,131071,0.04767520129680634
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,131071,0.045363199710845944
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,131071,0.053492802381515506
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,131071,0.05173280239105225
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,1,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,131071,0.05158720016479492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,131071,0.046312001347541806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,1,0.010708799958229065
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,1,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,1,0.010644800215959548
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,1,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,1,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,1,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,1,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,1,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,1,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,1,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,1,0.010555200278759003
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,1,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,1,0.010361599922180175
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,3,0.01255040019750595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,3,0.011648000031709672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,3,0.010553599894046783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,3,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,3,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,3,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,3,0.01242239996790886
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,3,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,3,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,3,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,3,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,3,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,3,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,3,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,7,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,7,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,7,0.010840000212192535
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,7,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,7,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,7,0.01072319969534874
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,7,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,7,0.01250080019235611
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,7,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,7,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,7,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,7,0.0089199997484684
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,7,0.010369600355625152
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,15,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,15,0.01170400008559227
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,15,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,15,0.010550399869680404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,15,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,7,0.010153599828481675
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,15,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,15,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,15,0.011958400160074234
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,15,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,15,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,31,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,15,0.008870399743318557
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,15,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,31,0.0107744000852108
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,15,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,15,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,31,0.010628800094127654
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,31,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,31,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,31,0.010446400195360184
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,31,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,31,0.010943999886512757
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,31,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,31,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,31,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,31,0.01003199964761734
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,63,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,31,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,31,0.01043040007352829
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,63,0.010907199978828431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,63,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,63,0.010532800108194351
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,63,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,63,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,63,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,63,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,63,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,63,0.011086399853229522
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,63,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,63,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,63,0.010387200117111205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,63,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,127,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,127,0.012464000284671784
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,127,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,127,0.011379200220108032
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,127,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,127,0.010440000146627427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,127,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,127,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,127,0.010846400260925293
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,127,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,127,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,127,0.009355200082063675
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,127,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,127,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,255,0.012428800016641617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,255,0.011436799913644791
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,255,0.010548800230026245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,255,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,255,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,255,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,255,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,255,0.01048479974269867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,255,0.010545600205659866
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,255,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,255,0.010395199805498124
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,255,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,511,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,255,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,511,0.013055999577045441
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,255,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,511,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,511,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,511,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,511,0.012425599992275238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,511,0.012936000525951386
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,511,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,511,0.012547199428081513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,511,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,511,0.010593599826097488
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,511,0.01053439974784851
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,511,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,1023,0.016022400557994844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,1023,0.01446560025215149
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,511,0.01056160032749176
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,1023,0.013502399623394012
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,1023,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,1023,0.013793599605560303
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,1023,0.014499199390411378
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,1023,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,1023,0.012564800679683685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,1023,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,1023,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,1023,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,1023,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,2047,0.02067680060863495
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,1023,0.012577599287033081
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,1023,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,2047,0.016857600212097167
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,2047,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,2047,0.014025600254535675
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,2047,0.016590400040149687
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,2047,0.013065600395202636
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,2047,0.012905600666999816
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,2047,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,2047,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,2047,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,2047,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,2047,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,4095,0.028476798534393312
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,2047,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,2047,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,4095,0.020659199357032774
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,4095,0.01867839992046356
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,4095,0.01675039976835251
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,4095,0.016574400663375854
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,4095,0.016601599752902985
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,4095,0.02070239931344986
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,4095,0.016516800224781036
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,4095,0.014588800072669984
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,4095,0.01656640022993088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,4095,0.016622400283813475
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,4095,0.014484800398349762
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,4095,0.014535999298095703
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,4095,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,8191,0.03086720108985901
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,8191,0.024779200553894043
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,8191,0.021060800552368163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,8191,0.018614399433135986
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,8191,0.018641600012779237
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,8191,0.021886399388313292
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,8191,0.021857599914073943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,8191,0.01863359957933426
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,8191,0.018111999332904815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,8191,0.018646399676799773
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,8191,0.01658399999141693
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,8191,0.015035200119018554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,8191,0.01656640022993088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,8191,0.01655679941177368
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,16383,0.030979201197624207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,16383,0.035051199793815616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,16383,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,16383,0.022814400494098663
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,16383,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,16383,0.02369280010461807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,16383,0.026655998826026917
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,16383,0.030323201417922975
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,16383,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,16383,0.020270399749279022
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,16383,0.03158240020275116
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,16383,0.018691200017929076
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,16383,0.018692800402641298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,32767,0.048619198799133304
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,32767,0.057739198207855225
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,32767,0.047660800814628604
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,16383,0.01867839992046356
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,32767,0.04881280064582825
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,32767,0.047075200080871585
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,32767,0.052939200401306154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,32767,0.03356159925460815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,32767,0.033022400736808774
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,32767,0.04730400145053863
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,32767,0.032995200157165526
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,32767,0.024878400564193725
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,32767,0.026868799328804018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,32767,0.024817599356174468
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,65535,0.07457919716835022
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,65535,0.07346559762954712
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,65535,0.09611039757728576
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,32767,0.02481279969215393
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,65535,0.07197279930114746
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,65535,0.0801584005355835
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,65535,0.07238720059394836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,65535,0.049374398589134214
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,65535,0.048665601015090945
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,65535,0.0724943995475769
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,65535,0.052662402391433716
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,65535,0.04154880046844482
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,65535,0.042078399658203126
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,131071,0.13201760053634642
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,131071,0.13118239641189575
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,131071,0.12443039417266846
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,65535,0.04203200042247772
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,65535,0.04193600118160248
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,131071,0.16384639739990234
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,131071,0.1233247995376587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,131071,0.12444159984588624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,131071,0.12568800449371337
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,131071,0.08010560274124146
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,131071,0.07188159823417664
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,131071,0.06408320069313049
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,131071,0.06362400054931641
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,131071,0.06365280151367188
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,1,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,131071,0.08405920267105102
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,1,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,131071,0.064547199010849
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,1,0.010926400125026704
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,1,0.010564800351858139
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,1,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,1,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,1,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,1,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,1,0.010609599947929382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,1,0.012537600100040435
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,1,0.0105103999376297
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,1,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,1,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,1,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,3,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,3,0.010649599879980088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,3,0.012460800260305405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,3,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,3,0.01043360009789467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,3,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,3,0.012432000041007996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,3,0.0117807999253273
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,3,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,3,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,3,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,3,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,3,0.010391999781131745
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,7,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,7,0.012569600343704223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,7,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,3,0.010371199995279311
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,7,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,7,0.01037919968366623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,7,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,7,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,7,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,7,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,7,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,7,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,7,0.010425599664449692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,7,0.010406400263309478
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,15,0.012564800679683685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,15,0.012566399574279786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,15,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,15,0.010526400059461594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,15,0.012435200065374375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,7,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,15,0.010524799674749374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,15,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,15,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,15,0.010372799634933472
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,15,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,15,0.010367999970912933
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,15,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,15,0.012443199753761292
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,31,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,15,0.010468800365924836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,31,0.010982400178909302
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,31,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,31,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,31,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,31,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,31,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,31,0.010396800190210342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,31,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,31,0.010496000200510025
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,31,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,31,0.01045600026845932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,31,0.010358399897813796
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,63,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,31,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,63,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,63,0.010527999699115753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,63,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,63,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,63,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,63,0.012428800016641617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,63,0.011494400352239609
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,63,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,63,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,63,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,63,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,63,0.010412800312042236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,127,0.012544000148773193
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,127,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,127,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,63,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,127,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,127,0.01077279970049858
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,127,0.010507199913263321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,127,0.010390400141477584
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,127,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,127,0.011608000099658965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,127,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,127,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,127,0.010385599732398988
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,127,0.0104592002928257
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,255,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,255,0.012542399764060973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,255,0.010847999900579452
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,255,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,127,0.010416000336408614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,255,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,255,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,255,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,255,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,255,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,255,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,255,0.010465600341558457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,255,0.010417599976062775
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,255,0.010409600287675857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,511,0.013711999356746673
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,511,0.015609599649906158
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,511,0.012577599287033081
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,511,0.014601600170135499
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,255,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,511,0.012483199685811996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,511,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,511,0.013433599472045898
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,511,0.011660800129175187
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,511,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,511,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,511,0.012427199631929398
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,511,0.010836800187826156
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,511,0.01053759977221489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,511,0.01055999994277954
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,1023,0.016540800034999848
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,1023,0.014497600495815277
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,1023,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,1023,0.01448799967765808
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,1023,0.02069759964942932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,1023,0.014526399970054626
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,1023,0.013788799941539764
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,1023,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,1023,0.012891200184822083
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,1023,0.0124208003282547
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,1023,0.01653759926557541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,1023,0.016595199704170227
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,2047,0.027004799246788024
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,2047,0.018700799345970152
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,2047,0.0166143998503685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,2047,0.020684799551963805
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,1023,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,1023,0.012427199631929398
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,2047,0.015360000729560851
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,2047,0.016564799845218657
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,2047,0.020638400316238405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,2047,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,2047,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,2047,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,2047,0.012561599910259246
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,2047,0.016599999368190767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,4095,0.023019200563430785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,2047,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,4095,0.028854399919509888
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,2047,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,4095,0.018628799915313722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,4095,0.018651199340820313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,4095,0.02067680060863495
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,4095,0.016707199811935424
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,4095,0.020798400044441223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,4095,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,4095,0.016547200083732606
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,4095,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,4095,0.0176144003868103
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,4095,0.016628800332546233
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,4095,0.014582400023937226
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,8191,0.029329600930213928
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,8191,0.029659199714660644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,4095,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,8191,0.03319199979305267
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,8191,0.020763200521469117
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,8191,0.020628799498081208
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,8191,0.025216001272201537
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,8191,0.024846400320529937
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,8191,0.02134079933166504
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,8191,0.02221119999885559
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,8191,0.01770720034837723
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,8191,0.016606399416923524
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,8191,0.01666239947080612
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,8191,0.022759999334812164
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,8191,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,16383,0.0472784012556076
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,16383,0.045291200280189514
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,16383,0.04681920111179352
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,16383,0.045332801342010495
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,16383,0.05018720030784607
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,16383,0.030777600407600404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,16383,0.033228799700737
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,16383,0.056704002618789676
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,16383,0.04391359984874725
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,16383,0.03136320114135742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,16383,0.02290080040693283
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,16383,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,16383,0.022735999524593355
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,32767,0.07489280104637146
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,32767,0.09562399983406067
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,16383,0.02279199957847595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,32767,0.07262399792671204
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,32767,0.08002719879150391
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,32767,0.07061120271682739
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,32767,0.047651201486587524
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,32767,0.07268639802932739
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,32767,0.07231199741363525
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,32767,0.04120000004768372
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,32767,0.04099999964237213
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,32767,0.046367999911308286
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,32767,0.05132799744606018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,32767,0.039860799908638
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,32767,0.03925440013408661
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,65535,0.16301440000534057
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,65535,0.12597279548645018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,65535,0.13104000091552734
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,65535,0.12561919689178466
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,65535,0.12553759813308715
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,65535,0.1307744026184082
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,65535,0.1260208010673523
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,65535,0.08289920091629029
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,65535,0.06997920274734497
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,65535,0.078847998380661
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,65535,0.06369600296020508
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,65535,0.06348959803581238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,131071,0.24710240364074706
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,131071,0.30333919525146485
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,131071,0.24729280471801757
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,131071,0.2278287887573242
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,65535,0.0629967987537384
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,65535,0.0627344012260437
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,131071,0.2245935916900635
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,131071,0.22966558933258058
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,131071,0.14857280254364014
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,131071,0.11684639453887939
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,131071,0.1454192042350769
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,131071,0.22298879623413087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,131071,0.10550880432128906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,131071,0.1049839973449707
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,131071,0.10679039955139161
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,1,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,1,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,131071,0.10531519651412964
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,1,0.012435200065374375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,1,0.010441599786281586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,1,0.011236800253391266
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,1,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,1,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,1,0.010398399829864503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,1,0.01244800016283989
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,1,0.010860799998044967
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,1,0.010419200360774993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,1,0.011615999788045884
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,1,0.010393600165843963
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,1,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,3,0.014519999921321868
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,3,0.012481600046157837
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,3,0.010531199723482132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,3,0.01072480008006096
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,3,0.012548799812793731
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,3,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,3,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,3,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,3,0.012443199753761292
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,3,0.011531200259923935
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,3,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,3,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,3,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,7,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,7,0.010894399881362916
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,3,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,7,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,7,0.014472000300884247
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,7,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,7,0.010532800108194351
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,7,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,7,0.012464000284671784
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,7,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,7,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,7,0.010470400005578995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,7,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,7,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,7,0.010492800176143647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,15,0.014619199931621552
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,15,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,15,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,15,0.010608000308275222
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,15,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,15,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,15,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,15,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,15,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,15,0.010494399815797806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,15,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,15,0.010443200170993806
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,15,0.010454399883747101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,15,0.010473600029945374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,31,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,31,0.014481599628925323
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,31,0.011928000301122666
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,31,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,31,0.010428799688816071
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,31,0.010497599840164185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,31,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,31,0.012590399384498597
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,31,0.010956799983978272
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,31,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,31,0.010491199791431427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,31,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,31,0.010420800000429154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,31,0.010380800068378448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,63,0.014534400403499603
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,63,0.01247360035777092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,63,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,63,0.010478399693965912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,63,0.012987199425697326
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,63,0.010438399761915207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,63,0.010475199669599533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,63,0.012539200484752655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,63,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,63,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,63,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,63,0.010424000024795533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,63,0.011076799780130386
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,127,0.010819199681282043
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,127,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,127,0.010558400303125381
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,127,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,63,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,127,0.012803199887275695
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,127,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,127,0.012582400441169738
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,127,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,127,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,127,0.010886400192975997
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,127,0.01045759990811348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,127,0.01040479987859726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,127,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,255,0.014590400457382201
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,127,0.010411199927330018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,255,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,255,0.010540799796581268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,255,0.012214399874210358
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,255,0.01255359947681427
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,255,0.01080320030450821
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,255,0.011281599849462509
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,255,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,255,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,255,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,255,0.010388799756765366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,255,0.010422399640083313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,255,0.0104032002389431
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,255,0.012486399710178375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,511,0.018555200099945067
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,511,0.014510400593280792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,511,0.012860800325870513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,511,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,511,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,511,0.015488000214099884
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,511,0.012691199779510498
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,511,0.012468799948692322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,511,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,511,0.011531200259923935
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,511,0.01451680064201355
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,511,0.012692800164222718
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,511,0.010516799986362457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,1023,0.02064319998025894
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,1023,0.016542400419712066
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,1023,0.016646400094032288
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,511,0.011006399989128113
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,1023,0.024905599653720856
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,1023,0.01563519984483719
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,1023,0.01462240070104599
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,1023,0.01552799940109253
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,1023,0.0187376007437706
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,1023,0.01655520051717758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,1023,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,1023,0.012443199753761292
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,1023,0.014057600498199463
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,2047,0.02682720124721527
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,2047,0.022233599424362184
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,1023,0.014575999975204468
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,1023,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,2047,0.020614400506019592
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,2047,0.01849440038204193
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,2047,0.017217600345611574
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,2047,0.016663999855518342
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,2047,0.016756799817085267
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,2047,0.019083200395107268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,2047,0.014537599682807923
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,2047,0.014535999298095703
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,2047,0.01659359931945801
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,2047,0.02069920003414154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,2047,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,2047,0.014571200311183929
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,4095,0.0287200003862381
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,4095,0.02887359857559204
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,4095,0.03285279870033264
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,4095,0.02073120027780533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,4095,0.01868479996919632
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,4095,0.02367199957370758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,4095,0.020694400370121
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,4095,0.016575999557971954
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,4095,0.02274399995803833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,4095,0.020750400424003602
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,4095,0.020739200711250304
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,4095,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,4095,0.016543999314308167
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,4095,0.01637600064277649
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,8191,0.04951840043067932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,8191,0.04745120108127594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,8191,0.04447839856147766
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,8191,0.043372800946235655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,8191,0.05355679988861084
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,8191,0.04360480010509491
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,8191,0.03302719891071319
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,8191,0.029049599170684816
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,8191,0.044152000546455385
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,8191,0.021227200329303742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,8191,0.022409600019454957
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,8191,0.022776000201702118
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,8191,0.02155199944972992
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,8191,0.030423998832702637
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,16383,0.07614719867706299
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,16383,0.07594879865646362
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,16383,0.07368639707565308
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,16383,0.07162079811096192
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,16383,0.07120000123977661
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,16383,0.07117760181427002
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,16383,0.04523519873619079
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,16383,0.049932798743247984
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,16383,0.09353439807891846
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,16383,0.04960319995880127
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,16383,0.04029279947280884
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,16383,0.03915840089321136
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,32767,0.13223520517349244
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,32767,0.13664640188217164
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,16383,0.0383215993642807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,16383,0.03907040059566498
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,32767,0.1284160017967224
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,32767,0.12438080310821534
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,32767,0.17120800018310547
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,32767,0.12592480182647706
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,32767,0.12745440006256104
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,32767,0.08321599960327149
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,32767,0.06779999732971191
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,32767,0.08494399785995484
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,32767,0.0616752028465271
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,32767,0.05960639715194702
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,65535,0.23934559822082518
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,32767,0.061635202169418334
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,65535,0.2582000017166138
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,65535,0.3265520095825195
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,65535,0.22607839107513428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,32767,0.06066399812698364
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,65535,0.22916479110717775
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,65535,0.22732160091400147
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,65535,0.22426559925079345
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,65535,0.11618080139160156
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,65535,0.15546879768371583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,65535,0.10578240156173706
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,65535,0.1557360053062439
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,65535,0.10395840406417847
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,65535,0.10514719486236572
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,131071,0.5085279941558838
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,65535,0.10452159643173217
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,131071,0.6354432106018066
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,131071,0.42220320701599123
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,131071,0.4282224178314209
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,131071,0.4544271945953369
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,131071,0.42702398300170896
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,131071,0.2995120048522949
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,131071,0.423144006729126
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,131071,0.2943648099899292
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,131071,0.19088000059127808
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,131071,0.21959519386291504
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,131071,0.189520001411438
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,131071,0.18869279623031615
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,131071,0.1888975977897644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,1,0.014108799397945404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,1,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,1,0.012464000284671784
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,1,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,1,0.012454400211572647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,1,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,1,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,1,0.01451520025730133
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,1,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,1,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,1,0.010552000254392624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,1,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,1,0.010902400314807891
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,3,0.014183999598026275
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,1,0.010603199899196624
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,3,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,3,0.014640000462532044
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,3,0.012438400089740754
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,3,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,3,0.012561599910259246
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,3,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,3,0.012430399656295776
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,3,0.013152000308036805
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,3,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,3,0.01053439974784851
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,3,0.010622400045394897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,3,0.010515200346708298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,3,0.012505599856376648
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,7,0.014484800398349762
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,7,0.01449120044708252
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,7,0.012430399656295776
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,7,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,7,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,7,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,7,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,7,0.012544000148773193
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,7,0.011113599687814713
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,7,0.010447999835014344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,7,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,7,0.010463999956846238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,7,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,15,0.014499199390411378
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,15,0.012532800436019897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,15,0.012483199685811996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,15,0.012452799826860428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,15,0.012481600046157837
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,7,0.014435200393199921
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,15,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,15,0.0144896000623703
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,15,0.012571200728416443
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,15,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,15,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,15,0.01242239996790886
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,31,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,31,0.01449279934167862
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,15,0.010427200049161912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,31,0.012583999335765839
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,31,0.012582400441169738
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,15,0.010523200035095215
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,15,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,31,0.012494400143623352
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,31,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,31,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,31,0.012559999525547028
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,31,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,31,0.010564800351858139
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,31,0.010407999902963639
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,31,0.01451520025730133
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,63,0.014519999921321868
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,63,0.014507199823856353
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,31,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,31,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,63,0.012619200348854064
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,63,0.012432000041007996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,63,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,63,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,63,0.012566399574279786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,63,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,63,0.012449599802494049
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,63,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,63,0.01048159971833229
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,63,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,127,0.013353599607944489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,63,0.010480000078678131
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,63,0.010502400249242783
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,127,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,127,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,127,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,127,0.012428800016641617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,127,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,127,0.0124719999730587
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,127,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,127,0.011631999909877778
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,127,0.010401599854230881
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,127,0.012590399384498597
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,127,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,255,0.01326719969511032
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,255,0.014476799964904785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,127,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,255,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,255,0.014561599493026734
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,255,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,127,0.010460799932479859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,255,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,255,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,255,0.014502400159835815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,255,0.012435200065374375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,255,0.010451199859380722
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,255,0.010583999752998351
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,255,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,255,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,511,0.020678399503231047
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,511,0.018603199720382692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,255,0.010377600044012069
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,511,0.01658719927072525
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,511,0.015035200119018554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,511,0.014563199877738953
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,511,0.01868479996919632
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,511,0.014574399590492249
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,511,0.014548799395561219
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,511,0.014529600739479065
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,511,0.012508800625801087
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,511,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,511,0.012449599802494049
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,511,0.013184000551700593
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,1023,0.025022399425506592
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,511,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,1023,0.022737599909305573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,1023,0.02059520035982132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,1023,0.01655520051717758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,1023,0.01664319932460785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,1023,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,1023,0.02070239931344986
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,1023,0.01868479996919632
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,1023,0.01663520038127899
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,1023,0.01656319946050644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,1023,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,1023,0.014531199634075165
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,1023,0.014281600713729858
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,2047,0.031784000992774966
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,1023,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,2047,0.027000001072883605
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,2047,0.02066880017518997
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,2047,0.02066880017518997
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,2047,0.020695999264717102
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,2047,0.024809600412845613
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,2047,0.03270559906959534
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,2047,0.02073120027780533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,2047,0.020742399990558623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,2047,0.016569599509239197
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,2047,0.02069759964942932
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,2047,0.014612799882888794
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,2047,0.015056000649929046
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,2047,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,4095,0.04952319860458374
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,4095,0.042499199509620667
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,4095,0.0494159996509552
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,4095,0.04121119976043701
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,4095,0.035017600655555724
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,4095,0.04212960004806519
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,4095,0.041275200247764585
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,4095,0.05482879877090454
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,4095,0.031012800335884095
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,4095,0.02687999904155731
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,4095,0.019993600249290467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,4095,0.01879359930753708
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,8191,0.07810400128364563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,4095,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,4095,0.02070080041885376
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,8191,0.09399679899215699
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,8191,0.06966559886932373
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,8191,0.0759440004825592
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,8191,0.06858239769935608
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,8191,0.06864159703254699
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,8191,0.051267200708389284
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,8191,0.04525119960308075
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,8191,0.05094079971313477
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,8191,0.037622401118278505
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,8191,0.03604480028152466
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,8191,0.03657119870185852
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,8191,0.07034879922866821
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,16383,0.13999999761581422
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,16383,0.1293280005455017
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,8191,0.037088000774383546
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,16383,0.17134560346603395
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,16383,0.12474240064620971
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,16383,0.1215008020401001
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,16383,0.12131999731063843
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,16383,0.08475840091705322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,16383,0.060356801748275755
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,16383,0.06996639966964721
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,16383,0.12036319971084594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,16383,0.05985919833183288
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,16383,0.05963360071182251
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,16383,0.08585919737815857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,16383,0.059552001953125
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,32767,0.26480000019073485
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,32767,0.32636640071868894
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,32767,0.23762240409851074
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,32767,0.22785439491271972
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,32767,0.2287071943283081
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,32767,0.22855839729309083
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,32767,0.12239359617233277
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,32767,0.15654239654541016
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,32767,0.22975680828094483
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,32767,0.10589280128479003
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,32767,0.10475679636001586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,32767,0.10517599582672119
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,32767,0.15521119832992553
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,65535,0.5142784118652344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,65535,0.4675295829772949
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,32767,0.1046671986579895
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,65535,0.44106078147888184
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,65535,0.4379263877868652
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,65535,0.44043359756469724
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,65535,0.30076160430908205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,65535,0.6375232219696045
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,65535,0.43898558616638184
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,65535,0.19493759870529176
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,65535,0.19547679424285888
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,65535,0.2298271894454956
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,65535,0.19506399631500243
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,65535,0.19396959543228148
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,65535,0.2985343933105469
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,1,0.014739200472831726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,1,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,1,0.013463999330997466
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,1,0.012432000041007996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,1,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,1,0.012467200309038163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,1,0.014616000652313232
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,1,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,1,0.016568000614643096
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,1,0.012495999783277511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,1,0.011635199934244157
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,1,0.01255200058221817
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,1,0.011483199894428253
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,1,0.012510399520397186
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,3,0.01611039936542511
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,3,0.014510400593280792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,3,0.012540799379348756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,3,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,3,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,3,0.01242400035262108
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,3,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,3,0.0165583997964859
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,3,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,3,0.012460800260305405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,3,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,3,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,3,0.010503999888896942
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,3,0.010499200224876404
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,7,0.012564800679683685
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,7,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,7,0.012544000148773193
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,7,0.014475199580192565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,7,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,7,0.014582400023937226
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,7,0.012462399899959564
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,7,0.01244639977812767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,7,0.012451200187206269
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,7,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,7,0.010462400317192078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,7,0.010444799810647965
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,7,0.014580799639225006
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,15,0.014727999269962311
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,15,0.013646399974822998
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,15,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,7,0.010486400127410889
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,15,0.012483199685811996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,15,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,15,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,15,0.01459839940071106
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,15,0.012478400021791458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,15,0.012492799758911132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,15,0.012520000338554382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,15,0.010471999645233154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,31,0.014912000298500061
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,15,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,15,0.010566399991512298
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,15,0.010505600273609162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,31,0.014468799531459808
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,31,0.012600000202655792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,31,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,31,0.01249760016798973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,31,0.012555199861526489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,31,0.012511999905109405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,31,0.014718399941921234
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,31,0.012542399764060973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,31,0.012503999471664428
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,31,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,31,0.010520000010728836
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,31,0.010489600151777268
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,31,0.01040000021457672
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,63,0.014539200067520141
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,63,0.01632480025291443
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,63,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,63,0.012559999525547028
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,63,0.014510400593280792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,63,0.016433599591255187
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,63,0.012601600587368011
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,63,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,63,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,63,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,63,0.010931199789047242
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,63,0.012537600100040435
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,63,0.010467199981212616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,63,0.010532800108194351
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,127,0.014558400213718414
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,127,0.014552000164985656
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,127,0.012470400333404541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,127,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,127,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,127,0.012441600114107132
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,127,0.012529599666595458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,127,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,127,0.012516799569129943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,127,0.0147024005651474
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,127,0.010449600219726563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,127,0.010487999767065048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,127,0.011104000359773636
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,127,0.012521600723266602
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,255,0.016655999422073364
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,255,0.014947199821472168
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,255,0.014587199687957764
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,255,0.012580800056457519
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,255,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,255,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,255,0.012567999958992004
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,255,0.012563200294971466
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,255,0.016543999314308167
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,255,0.012491200119256973
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,255,0.010500799864530563
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,255,0.014459200203418732
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,255,0.01048320010304451
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,511,0.022731199860572815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,255,0.010436800122261048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,511,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,511,0.01656319946050644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,511,0.020708799362182617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,511,0.015769599378108977
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,511,0.016510400176048278
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,511,0.018540799617767334
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,511,0.0186256006360054
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,511,0.014521600306034088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,511,0.012600000202655792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,511,0.01247519999742508
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,511,0.012566399574279786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,511,0.019012799859046935
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,511,0.012531200051307678
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,1023,0.028886398673057555
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,1023,0.028166401386260986
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,1023,0.030902400612831116
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,1023,0.02051520049571991
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,1023,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,1023,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,1023,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,1023,0.022753599286079406
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,1023,0.0186271995306015
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,1023,0.016633599996566772
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,1023,0.014582400023937226
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,1023,0.01454399973154068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,1023,0.018668800592422485
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,1023,0.014614400267601014
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,2047,0.047156798839569095
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,2047,0.043433600664138795
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,2047,0.04153760075569153
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,2047,0.042735999822616576
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,2047,0.05133280158042908
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,2047,0.03236800134181976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,2047,0.053497600555419925
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,2047,0.04304159879684448
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,2047,0.02892960011959076
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,2047,0.020729599893093108
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,2047,0.018603199720382692
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,2047,0.028889599442481994
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,2047,0.018671999871730804
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,2047,0.018719999492168425
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,4095,0.07473919987678528
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,4095,0.09148799777030944
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,4095,0.06967999935150146
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,4095,0.07146080136299134
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,4095,0.07807040214538574
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,4095,0.06895999908447266
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,4095,0.04930399954319
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,4095,0.03917759954929352
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,4095,0.04819999933242798
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,4095,0.04724160134792328
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,4095,0.03709439933300018
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,4095,0.06949599981307983
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,4095,0.036399999260902406
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,8191,0.1361631989479065
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,8191,0.13231840133666992
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,8191,0.16864800453186035
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,8191,0.12436000108718873
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,4095,0.03728480041027069
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,8191,0.12281919717788696
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,8191,0.08112000226974488
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,8191,0.121670401096344
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,8191,0.12198560237884522
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,8191,0.0720192015171051
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,8191,0.08414559960365295
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,8191,0.06176480054855347
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,8191,0.05964159965515137
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,8191,0.0596448004245758
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,16383,0.26282238960266113
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,8191,0.05996000170707703
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,16383,0.22761120796203613
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,16383,0.23110721111297608
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,16383,0.22729599475860596
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,16383,0.32222559452056887
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,16383,0.22703680992126465
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,16383,0.15277279615402223
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,16383,0.2396575927734375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,16383,0.15352799892425537
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,16383,0.10653599500656127
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,16383,0.10711679458618165
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,16383,0.10546079874038697
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,16383,0.10475200414657593
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,16383,0.12400640249252319
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,32767,0.5152383804321289
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,32767,0.43888320922851565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,32767,0.6283631801605225
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,32767,0.44049921035766604
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,32767,0.4396512031555176
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,32767,0.4638224124908447
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,32767,0.4389376163482666
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,32767,0.2318351984024048
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,32767,0.294270396232605
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,32767,0.19611680507659912
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,32767,0.292411208152771
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,32767,0.19637759923934936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,1,0.021876800060272216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,32767,0.19496959447860718
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,32767,0.19419840574264527
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,1,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,1,0.014483200013637542
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,1,0.016571199893951415
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,1,0.01433439999818802
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,1,0.013012799620628356
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,1,0.02282879948616028
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,1,0.017185600101947786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,1,0.012535999715328216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,1,0.012513600289821625
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,1,0.012476799637079239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,1,0.015622399747371674
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,1,0.012488000094890594
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,1,0.012291199713945388
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,3,0.022404800355434417
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,3,0.01656319946050644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,3,0.014475199580192565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,3,0.02029920071363449
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,3,0.01451359987258911
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,3,0.013784000277519226
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,3,0.017697599530220032
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,3,0.022720000147819518
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,3,0.01611199975013733
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,3,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,3,0.012545600533485413
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,3,0.01252480000257492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,7,0.02154559940099716
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,3,0.01165440008044243
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,7,0.020720000565052032
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,3,0.012432000041007996
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,7,0.016607999801635742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,7,0.01459999978542328
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,7,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,7,0.014127999544143677
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,7,0.014388799667358398
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,7,0.016568000614643096
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,7,0.012460800260305405
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,7,0.02269120067358017
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,7,0.012417600303888322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,7,0.01677920073270798
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,7,0.01250240057706833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,7,0.012425599992275238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,15,0.020868800580501556
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,15,0.016707199811935424
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,15,0.020659199357032774
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,15,0.01449120044708252
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,15,0.014585599303245544
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,15,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,15,0.014556799829006196
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,15,0.022707200050354003
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,15,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,15,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,15,0.012465599924325943
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,15,0.012479999661445617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,15,0.018617600202560425
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,15,0.01241919994354248
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,31,0.020686399936676026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,31,0.016598400473594666
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,31,0.0222463995218277
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,31,0.01462399959564209
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,31,0.014502400159835815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,31,0.013803200423717498
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,31,0.022742399573326112
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,31,0.01685599982738495
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,31,0.012507200241088867
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,31,0.014542399346828461
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,31,0.012436799705028534
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,31,0.016331200301647187
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,31,0.012515200674533844
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,31,0.011023999750614166
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,63,0.022152000665664674
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,63,0.016652800142765045
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,63,0.020763200521469117
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,63,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,63,0.016604800522327424
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,63,0.022771200537681578
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,63,0.01456640064716339
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,63,0.014455999433994293
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,63,0.014553600549697876
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,63,0.016950400173664094
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,63,0.012558400630950928
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,63,0.012459199875593185
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,63,0.01252799928188324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,63,0.012439999729394913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,127,0.01459839940071106
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,127,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,127,0.014601600170135499
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,127,0.020710399746894835
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,127,0.021031999588012697
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,127,0.014481599628925323
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,127,0.016633599996566772
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,127,0.012561599910259246
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,127,0.012425599992275238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,127,0.01736319959163666
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,127,0.012484800070524216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,127,0.0124208003282547
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,127,0.022623999416828154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,255,0.026844799518585205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,255,0.01866080015897751
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,255,0.02074880003929138
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,255,0.014496000111103058
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,255,0.014499199390411378
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,255,0.014547200500965118
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,127,0.014569599926471711
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,255,0.024612799286842346
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,255,0.016700799763202667
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,255,0.016603200137615202
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,255,0.012457600235939026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,255,0.01249919980764389
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,255,0.012455999851226807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,255,0.014494399726390838
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,511,0.03704639971256256
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,511,0.026830399036407472
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,511,0.020628799498081208
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,511,0.03198559880256653
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,255,0.01252640038728714
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,511,0.018585599958896637
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,511,0.018564799427986146
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,511,0.028828799724578857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,511,0.02072640061378479
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,511,0.021377600729465485
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,511,0.018372799456119537
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,511,0.014608000218868256
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,511,0.014283199608325959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,511,0.014481599628925323
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,1023,0.052985602617263795
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,1023,0.04904640018939972
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,511,0.014579200744628906
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,1023,0.05516800284385681
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,1023,0.04129120111465454
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,1023,0.04049600064754486
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,1023,0.040966400504112245
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,1023,0.03912320137023926
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,1023,0.04242079854011536
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,1023,0.026921600103378296
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,1023,0.01870719939470291
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,1023,0.018643200397491455
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,1023,0.017211200296878816
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,2047,0.08247039914131164
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,1023,0.018563200533390046
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,1023,0.03213759958744049
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,2047,0.0688975989818573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,2047,0.09468960165977477
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,2047,0.06777600049972535
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,2047,0.06679840087890625
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,2047,0.06698399782180786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,2047,0.07613279819488525
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,2047,0.05541120171546936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,2047,0.05140159726142883
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,2047,0.03706879913806915
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,2047,0.03546560108661652
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,2047,0.04557439982891083
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,2047,0.035504001379013064
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,2047,0.03570080101490021
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,4095,0.17311040163040162
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,4095,0.1285215973854065
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,4095,0.121452796459198
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,4095,0.14311039447784424
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,4095,0.12096799612045288
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,4095,0.11951680183410644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,4095,0.11898399591445923
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,4095,0.08781120181083679
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,4095,0.08655679821968079
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,4095,0.07027680277824402
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,4095,0.05894399881362915
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,4095,0.05881119966506958
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,4095,0.05873119831085205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,8191,0.2699984073638916
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,4095,0.06068800091743469
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,8191,0.23597440719604493
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,8191,0.3250112056732178
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,8191,0.22722239494323732
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,8191,0.2260432004928589
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,8191,0.15974719524383546
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,8191,0.2266688108444214
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,8191,0.15745439529418945
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,8191,0.10581120252609252
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,8191,0.12168159484863281
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,8191,0.10450079441070556
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,8191,0.1045151948928833
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,8191,0.2258847951889038
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,8191,0.1039728045463562
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,16383,0.4604288101196289
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,16383,0.5150288105010986
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,16383,0.6335135936737061
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,16383,0.4340640068054199
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,16383,0.4358831882476807
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,16383,0.43590078353881834
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,16383,0.22889599800109864
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,16383,0.29913759231567383
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,16383,0.19529919624328612
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,16383,0.3057744026184082
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,16383,0.43443999290466306
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,16383,0.1940816044807434
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,16383,0.1935696005821228
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,16383,0.1940559983253479
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,1,0.039027199149131775
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,1,0.024835200607776643
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,1,0.031876799464225766
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,1,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,1,0.019211199879646302
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,1,0.020656000077724456
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,1,0.023177599906921385
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,1,0.018681600689888
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,1,0.025920000672340394
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,1,0.01648319959640503
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,1,0.03691520094871521
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,1,0.016443200409412384
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,1,0.018569600582122803
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,1,0.016628800332546233
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,3,0.039017599821090695
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,3,0.019088000059127808
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,3,0.020457600057125092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,3,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,3,0.030932798981666565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,3,0.018783999979496
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,3,0.024723200500011443
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,3,0.022777600586414336
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,3,0.026819199323654175
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,3,0.016638399660587312
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,3,0.03705280125141144
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,3,0.016590400040149687
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,3,0.015161600708961488
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,3,0.015563200414180755
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,7,0.032743999361991884
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,7,0.024784000217914583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,7,0.020623999834060668
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,7,0.03904159963130951
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,7,0.019043199717998505
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,7,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,7,0.020694400370121
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,7,0.023022399842739107
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,7,0.017254400253295898
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,7,0.026844799518585205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,7,0.03686400055885315
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,7,0.01652960032224655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,7,0.015273599326610566
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,7,0.016278399527072905
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,15,0.024784000217914583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,15,0.03231039941310883
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,15,0.03916319906711578
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,15,0.02070239931344986
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,15,0.020209600031375886
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,15,0.019382399320602418
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,15,0.022779199481010436
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,15,0.02682879865169525
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,15,0.0370608001947403
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,15,0.016451199352741242
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,15,0.018662400543689728
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,15,0.019337600469589232
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,31,0.03914079964160919
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,15,0.015804800391197204
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,31,0.02481440007686615
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,15,0.0157600000500679
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,31,0.020718400180339814
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,31,0.020712000131607056
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,31,0.020623999834060668
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,31,0.03277119994163513
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,31,0.037067198753356935
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,31,0.023201599717140198
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,31,0.01990240067243576
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,31,0.026881599426269533
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,31,0.016648000478744505
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,31,0.016624000668525696
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,31,0.01865279972553253
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,63,0.039324799180030824
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,31,0.016579200327396394
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,63,0.0206496000289917
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,63,0.020871999859809875
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,63,0.01969120055437088
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,63,0.024772800505161285
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,63,0.03708159923553467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,63,0.02056480050086975
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,63,0.03291999995708465
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,63,0.026926401257514953
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,63,0.018454399704933167
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,63,0.016630400717258454
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,63,0.02298240065574646
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,63,0.014884799718856812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,63,0.016139200329780577
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,127,0.039233601093292235
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,127,0.03302719891071319
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,127,0.020686399936676026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,127,0.026628801226615907
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,127,0.01887039989233017
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,127,0.018676799535751343
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,127,0.019064000248908995
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,127,0.022998400032520294
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,127,0.037124800682067874
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,127,0.02685759961605072
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,127,0.018654400110244752
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,127,0.014584000408649444
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,127,0.01531040072441101
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,127,0.016625599563121797
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,255,0.02150080054998398
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,255,0.03737919926643372
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,255,0.04746400117874146
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,255,0.019307200610637665
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,255,0.020691199600696562
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,255,0.019334399700164796
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,255,0.03715200126171112
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,255,0.026927998661994933
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,255,0.016523200273513793
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,255,0.016616000235080718
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,255,0.01643519997596741
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,255,0.026825600862503053
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,255,0.01724800020456314
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,511,0.06322720050811767
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,511,0.0536191999912262
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,255,0.0429280012845993
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,511,0.05972639918327331
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,511,0.04329920113086701
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,511,0.04299040138721466
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,511,0.04129279851913452
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,511,0.05142239928245544
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,511,0.038915199041366574
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,511,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,511,0.020664000511169435
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,511,0.020608000457286835
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,511,0.04110080003738403
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,511,0.020716799795627593
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,1023,0.08293759822845459
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,1023,0.09240480065345764
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,511,0.03712320029735565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,1023,0.09834880232810975
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,1023,0.07017279863357544
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,1023,0.06637279987335205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,1023,0.06722720265388489
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,1023,0.052724802494049074
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,1023,0.039985600113868716
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,1023,0.06881440281867982
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,1023,0.037115201354026794
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,1023,0.056446397304534913
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,1023,0.06871039867401123
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,1023,0.03725599944591522
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,1023,0.037115201354026794
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,2047,0.15483200550079346
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,2047,0.13760160207748412
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,2047,0.12234079837799072
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,2047,0.11998239755630494
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,2047,0.09953439831733704
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,2047,0.17643359899520875
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,2047,0.120414400100708
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,2047,0.08010879755020142
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,2047,0.09239680171012879
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,2047,0.1196943998336792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,2047,0.06394240260124207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,2047,0.061627197265625
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,2047,0.061238402128219606
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,4095,0.2448960065841675
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,4095,0.2833199977874756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,4095,0.2239135980606079
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,4095,0.22238240242004395
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,2047,0.059854400157928464
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,4095,0.2263551950454712
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,4095,0.33019199371337893
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,4095,0.22222559452056884
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,4095,0.16803200244903566
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,4095,0.16360960006713868
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,4095,0.13201919794082642
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,4095,0.10686559677124023
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,8191,0.5249584197998047
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,4095,0.10774240493774415
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,4095,0.1091871976852417
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,4095,0.10723999738693238
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,8191,0.46777119636535647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,8191,0.6318624019622803
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,8191,0.4318543910980225
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,8191,0.4304384231567383
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,8191,0.20146560668945312
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,8191,0.30305440425872804
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,8191,0.23884000778198242
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,8191,0.4308495998382568
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,8191,0.4352719783782959
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,8191,0.3136320114135742
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,8191,0.1992640018463135
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,8191,0.19641920328140258
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,1,0.06629279851913453
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,8191,0.19746400117874147
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,1,0.04519839882850647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,1,0.053283202648162845
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,1,0.03060320019721985
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,1,0.029447999596595765
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,1,0.06625440120697021
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,1,0.039110401272773744
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,1,0.028881600499153136
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,1,0.04400160014629364
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,1,0.03288480043411255
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,1,0.026739200949668883
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,1,0.024715200066566467
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,1,0.02279680073261261
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,1,0.022993600368499754
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,3,0.06612319946289062
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,3,0.053390401601791385
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,3,0.0289247989654541
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,3,0.03088639974594116
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,3,0.045193600654602054
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,3,0.032969599962234496
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,3,0.02956640124320984
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,3,0.06647040247917176
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,3,0.0390608012676239
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,3,0.04371840059757233
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,3,0.024780799448490144
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,3,0.026771199703216553
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,7,0.06635199785232544
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,7,0.04534080028533936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,7,0.032969599962234496
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,7,0.05331680178642273
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,3,0.022889600694179536
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,3,0.023080000281333925
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,7,0.030895999073982237
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,7,0.029131200909614564
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,7,0.030931198596954347
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,7,0.03915359973907471
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,7,0.06646559834480285
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,7,0.02279520034790039
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,7,0.044223999977111815
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,7,0.024718399345874786
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,7,0.022745600342750548
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,15,0.06690239906311035
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,15,0.045228800177574156
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,15,0.03291040062904358
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,15,0.028935998678207397
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,15,0.053844797611236575
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,7,0.02688480019569397
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,15,0.06722880005836487
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,15,0.039262399077415466
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,15,0.02971999943256378
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,15,0.030852800607681273
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,15,0.026791998744010927
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,15,0.02481440007686615
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,31,0.06710399985313416
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,15,0.044924798607826236
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,31,0.03265439867973328
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,15,0.02279839962720871
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,31,0.04618239998817444
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,31,0.030907198786735535
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,31,0.05358880162239075
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,15,0.022752000391483305
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,31,0.030137598514556885
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,31,0.030907198786735535
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,31,0.06656960248947144
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,31,0.027444800734519957
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,31,0.02280319929122925
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,31,0.04354560077190399
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,31,0.03936800062656402
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,31,0.024702399969100952
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,31,0.02314079999923706
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,63,0.0330159991979599
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,63,0.04729120135307312
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,63,0.05551360249519348
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,63,0.06724479794502258
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,63,0.029787200689315795
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,63,0.06787359714508057
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,63,0.04121760129928589
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,63,0.03094080090522766
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,63,0.029598399996757507
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,63,0.026939201354980468
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,63,0.04471679925918579
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,63,0.0227743998169899
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,63,0.023632000386714935
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,63,0.023315200209617616
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,127,0.058083200454711915
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,127,0.06997600197792053
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,127,0.0350928008556366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,127,0.04928799867630005
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,127,0.03056640028953552
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,127,0.030964800715446474
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,127,0.030873599648475646
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,127,0.06772639751434326
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,127,0.043249601125717164
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,127,0.026811200380325317
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,127,0.02279199957847595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,127,0.02306240051984787
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,127,0.04562079906463623
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,255,0.08427039980888366
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,127,0.022763200104236603
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,255,0.06577759981155396
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,255,0.04314880073070526
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,255,0.047259199619293216
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,255,0.0423119992017746
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,255,0.07366880178451538
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,255,0.04131839871406555
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,255,0.05140479803085327
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,255,0.06236159801483154
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,255,0.04729759991168976
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,255,0.022787199914455415
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,255,0.02285120040178299
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,255,0.023820799589157105
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,255,0.02882719933986664
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,511,0.11054879426956177
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,511,0.07622399926185608
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,511,0.07263039946556091
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,511,0.09483199715614318
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,511,0.06961119771003724
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,511,0.07031679749488831
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,511,0.09954079985618591
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,511,0.08639360070228577
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,511,0.04373280107975006
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,511,0.06186559796333313
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,511,0.03909119963645935
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,511,0.03909600079059601
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,511,0.04079039990901947
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,511,0.06553279757499694
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,1023,0.17259999513626098
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,1023,0.12262719869613647
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,1023,0.12135039567947388
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,1023,0.14979840517044068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,1023,0.17004159688949586
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,1023,0.12812960147857666
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,1023,0.12114720344543457
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,1023,0.11439679861068726
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,1023,0.09514080286026001
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,1023,0.06781759858131409
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,1023,0.06364480257034302
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,1023,0.06290240287780761
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,1023,0.061689597368240354
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,1023,0.09124640226364136
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,2047,0.2918879985809326
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,2047,0.23042399883270265
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,2047,0.22659680843353272
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,2047,0.3112560033798218
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,2047,0.2615999937057495
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,2047,0.22366719245910643
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,2047,0.22478721141815186
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,2047,0.18524320125579835
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,2047,0.11092480421066284
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,2047,0.16085120439529418
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,2047,0.14430559873580934
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,2047,0.10846240520477295
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,2047,0.10968480110168458
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,2047,0.11539360284805297
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,4095,0.5249536037445068
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,4095,0.585038423538208
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,4095,0.43930559158325194
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,4095,0.43088159561157224
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,4095,0.4326784133911133
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,4095,0.48914399147033694
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,4095,0.4310863971710205
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,4095,0.2894752025604248
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,4095,0.25278239250183104
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,4095,0.3200160026550293
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,4095,0.208076810836792
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,4095,0.2032464027404785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,4095,0.20068960189819335
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,1,0.07825599908828736
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,4095,0.2000432014465332
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,1,0.049809598922729494
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,1,0.054414397478103636
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,1,0.12044479846954345
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,1,0.09969760179519653
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,1,0.04919199943542481
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,1,0.12231680154800414
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,1,0.04548319876194
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,1,0.0795408010482788
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,1,0.04921280145645142
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,1,0.03916319906711578
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,1,0.03714880049228668
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,1,0.07311999797821045
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,1,0.03712640106678009
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,3,0.12173919677734375
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,3,0.07845759987831116
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,3,0.10018719434738159
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,3,0.04930239915847778
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,3,0.04943839907646179
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,3,0.1231119990348816
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,3,0.053799998760223386
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,3,0.04939680099487305
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,3,0.07232959866523743
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,3,0.0801584005355835
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,3,0.03707840144634247
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,3,0.039150398969650266
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,3,0.037360000610351565
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,3,0.04529919922351837
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,7,0.10050560235977173
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,7,0.1203376054763794
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,7,0.05560960173606873
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,7,0.07842400074005126
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,7,0.04996959865093231
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,7,0.049435201287269595
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,7,0.07278400063514709
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,7,0.04938560128211975
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,7,0.04597119987010956
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,7,0.08019840121269226
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,7,0.12311040163040161
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,7,0.03776800036430359
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,7,0.037110400199890134
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,15,0.12095199823379517
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,15,0.07842400074005126
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,15,0.10082080364227294
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,15,0.04941120147705078
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,15,0.05515679717063904
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,7,0.03919520080089569
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,15,0.04981279969215393
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,15,0.04936000108718872
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,15,0.12315679788589477
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,15,0.04554080069065094
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,15,0.08071039915084839
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,15,0.07365120053291321
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,15,0.039110401272773744
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,31,0.12044960260391235
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,15,0.03705280125141144
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,31,0.07843040227890015
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,31,0.10052160024642945
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,31,0.05590559840202332
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,15,0.03843519985675812
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,31,0.050049597024917604
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,31,0.049430400133132935
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,31,0.0738752007484436
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,31,0.049348801374435425
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,31,0.12308800220489502
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,31,0.08196960091590881
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,31,0.04546079933643341
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,63,0.12180800437927246
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,31,0.03709760010242462
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,63,0.05801600217819214
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,31,0.03715679943561554
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,63,0.09913439750671386
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,31,0.03919520080089569
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,63,0.07954400181770324
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,63,0.05209280252456665
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,63,0.04965119957923889
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,63,0.12313599586486816
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,63,0.04939680099487305
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,63,0.08217120170593262
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,63,0.03731519877910614
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,63,0.039103999733924866
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,63,0.07390559911727905
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,63,0.03717280030250549
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,127,0.12458080053329468
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,63,0.0472351998090744
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,127,0.08873760104179382
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,127,0.10163520574569702
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,127,0.06352800130844116
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,127,0.05751039981842041
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,127,0.05502399802207947
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,127,0.05550079941749573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,127,0.12187039852142334
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,127,0.04948000013828278
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,127,0.08239200115203857
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,127,0.04126720130443573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,127,0.037143999338150026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,127,0.03757759928703308
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,255,0.11359679698944092
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,127,0.0746720016002655
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,255,0.1506880044937134
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,255,0.10980000495910644
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,255,0.0699887990951538
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,255,0.06865599751472473
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,255,0.07819039821624756
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,255,0.1299839973449707
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,255,0.06762080192565918
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,255,0.0872816026210785
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,255,0.08066880106925964
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,255,0.04244000017642975
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,255,0.051553601026535036
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,255,0.045105600357055665
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,511,0.2109328031539917
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,255,0.04138079881668091
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,511,0.12936960458755492
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,511,0.11839359998703003
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,511,0.11945439577102661
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,511,0.12414560317993165
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,511,0.18500319719314576
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,511,0.1604640007019043
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,511,0.1661695957183838
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,511,0.10954879522323609
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,511,0.0728767991065979
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,511,0.11032639741897583
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,511,0.06575679779052734
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,511,0.06372320055961608
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,1023,0.33363680839538573
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,1023,0.3256400108337402
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,511,0.06305119991302491
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,1023,0.22560160160064696
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,1023,0.2164463996887207
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,1023,0.27192161083221433
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,1023,0.21626880168914794
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,1023,0.21284000873565673
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,1023,0.15881600379943847
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,1023,0.11731679439544677
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,1023,0.22462079524993897
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,1023,0.10998239517211914
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,1023,0.10518560409545899
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,1023,0.17595839500427246
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,1023,0.10687999725341797
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,2047,0.5763855934143066
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,2047,0.6098383903503418
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,2047,0.48072161674499514
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,2047,0.4136688232421875
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,2047,0.4228816032409668
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,2047,0.40888638496398927
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,2047,0.40789117813110354
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,2047,0.3059808015823364
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,2047,0.197979199886322
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,2047,0.20675840377807617
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,2047,0.3687536001205444
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,2047,0.26865758895874026
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,2047,0.1950543999671936
SGLang,0.5.9,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,2047,0.19249600172042847
