framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,1,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,1,0.011084800213575363
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,1,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,1,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,1,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,1,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,1,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,3,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,1,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,1,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,3,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,1,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,1,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,1,0.008723200112581254
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,1,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,3,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,3,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,1,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,3,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,3,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,3,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,3,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,3,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,3,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,3,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,3,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,3,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,7,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,7,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,7,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,3,0.009359999746084213
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,7,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,7,0.009990400075912476
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,7,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,7,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,7,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,7,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,7,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,7,0.008406399935483932
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,7,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,7,0.008427199721336365
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,7,0.008446399867534638
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,15,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,15,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,15,0.010567999631166457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,15,0.009359999746084213
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,15,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,15,0.009702400118112565
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,15,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,15,0.00968480035662651
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,15,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,15,0.008463999629020691
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,15,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,15,0.00846719965338707
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,15,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,31,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,31,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,15,0.00841279998421669
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,31,0.009753599762916565
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,31,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,31,0.009679999947547913
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,31,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,31,0.010532800108194351
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,31,0.00846560001373291
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,31,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,31,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,31,0.009567999839782714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,31,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,31,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,31,0.008433599770069123
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,63,0.010515200346708298
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,63,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,63,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,63,0.010539200156927109
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,63,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,63,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,63,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,63,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,63,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,63,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,63,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,63,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,63,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,127,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,127,0.010526400059461594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,63,0.00846719965338707
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,127,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,127,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,127,0.009355200082063675
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,127,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,127,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,127,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,127,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,127,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,127,0.010080000013113022
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,127,0.008463999629020691
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,127,0.009492799639701843
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,255,0.010524799674749374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,255,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,127,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,255,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,255,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,255,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,255,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,255,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,255,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,255,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,255,0.008436799794435502
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,255,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,255,0.009567999839782714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,255,0.008763200044631958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,255,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,511,0.012969599664211273
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,511,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,511,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,511,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,511,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,511,0.011318399757146835
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,511,0.01225920021533966
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,511,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,511,0.010838399827480315
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,511,0.010513599961996078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,511,0.011486399918794632
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,511,0.01056160032749176
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,511,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,511,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,1023,0.01533920019865036
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,1023,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,1023,0.014614400267601014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,1023,0.0127920001745224
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,1023,0.013483199477195739
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,1023,0.014577600359916686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,1023,0.014635199308395385
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,1023,0.012495999783277511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,1023,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,1023,0.012619200348854064
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,1023,0.014510400593280792
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,1023,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,1023,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,1023,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,2047,0.01658719927072525
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,2047,0.012673600018024445
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,2047,0.014535999298095703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,2047,0.014535999298095703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,2047,0.013756799697875976
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,2047,0.01263359934091568
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,2047,0.013447999954223633
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,2047,0.013441599905490875
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,2047,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,2047,0.014572800695896148
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,4095,0.016684800386428833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,2047,0.012479999661445617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,2047,0.012625600397586822
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,2047,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,2047,0.012563200294971466
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,4095,0.016046400368213653
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,4095,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,4095,0.014532800018787383
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,4095,0.014483200013637542
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,4095,0.01459839940071106
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,4095,0.014526399970054626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,4095,0.014611199498176575
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,4095,0.014627200365066529
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,4095,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,4095,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,4095,0.014636799693107605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,4095,0.012459199875593185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,8191,0.02354239970445633
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,8191,0.018694399297237395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,4095,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,8191,0.01604959964752197
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,8191,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,8191,0.016676799952983858
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,8191,0.01868479996919632
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,8191,0.016655999422073364
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,8191,0.01656640022993088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,8191,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,8191,0.016847999393939973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,8191,0.01446399986743927
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,8191,0.01541920006275177
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,8191,0.014572800695896148
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,8191,0.014609600603580474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,16383,0.02066880017518997
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,16383,0.02690559923648834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,16383,0.01929599940776825
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,16383,0.01979680061340332
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,16383,0.01897760033607483
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,16383,0.024820800125598907
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,16383,0.019284799695014954
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,16383,0.018510399758815764
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,16383,0.020662400126457214
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,16383,0.020761600136756896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,16383,0.01664000004529953
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,16383,0.018651199340820313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,32767,0.03078399896621704
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,16383,0.01839040070772171
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,32767,0.028734400868415833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,32767,0.02688960134983063
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,16383,0.01717599928379059
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,32767,0.040398401021957395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,32767,0.026895999908447266
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,32767,0.02686559855937958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,32767,0.023601600527763368
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,32767,0.022681599855422972
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,32767,0.02696320116519928
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,32767,0.022720000147819518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,32767,0.027428799867630006
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,32767,0.02165600061416626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,32767,0.022808000445365906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,32767,0.02280319929122925
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,65535,0.04063200056552887
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,65535,0.035104000568389894
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,65535,0.043161600828170776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,65535,0.033555200695991515
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,65535,0.03095200061798096
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,65535,0.03505280017852783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,65535,0.0377263993024826
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,65535,0.037302398681640626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,65535,0.028892800211906433
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,65535,0.0288239985704422
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,65535,0.028841599822044373
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,65535,0.026899200677871705
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,65535,0.02687999904155731
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,65535,0.02693760097026825
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,131071,0.07139520049095154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,131071,0.05347359776496887
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,131071,0.05531200170516968
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,131071,0.05479360222816467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,131071,0.053416001796722415
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,131071,0.05971519947052002
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,131071,0.055427199602127074
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,131071,0.033327999711036685
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,131071,0.041576001048088077
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,131071,0.033004799485206605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,131071,0.032742398977279666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,131071,0.033076798915863036
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,1,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,131071,0.04184640049934387
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,1,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,1,0.01141439974308014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,1,0.010577599704265594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,1,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,1,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,1,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,131071,0.03274079859256744
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,1,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,1,0.010289599746465683
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,1,0.009337600320577621
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,1,0.010540799796581268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,1,0.00902400016784668
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,1,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,1,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,3,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,3,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,3,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,3,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,3,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,3,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,3,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,3,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,3,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,3,0.009676799923181535
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,3,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,3,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,7,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,3,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,7,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,7,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,3,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,7,0.010521599650382995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,7,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,7,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,7,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,7,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,7,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,7,0.01085119992494583
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,7,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,7,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,7,0.009969600290060044
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,7,0.008472000062465668
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,15,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,15,0.010513599961996078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,15,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,15,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,15,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,15,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,15,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,15,0.010990399867296219
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,15,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,15,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,15,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,15,0.009859199821949004
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,31,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,15,0.00939520001411438
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,15,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,31,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,31,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,31,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,31,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,31,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,31,0.010542400181293488
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,31,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,31,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,31,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,31,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,31,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,31,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,31,0.009008000046014786
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,63,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,63,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,63,0.010351999849081039
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,63,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,63,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,63,0.01061440035700798
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,63,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,63,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,63,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,63,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,63,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,63,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,63,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,127,0.010899200290441512
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,127,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,127,0.010780800133943558
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,127,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,63,0.008673600107431411
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,127,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,127,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,127,0.010363200306892395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,127,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,127,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,127,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,127,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,127,0.008459199965000153
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,255,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,255,0.010974399745464325
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,127,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,255,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,127,0.008449599891901017
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,255,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,255,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,255,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,255,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,255,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,255,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,255,0.008823999762535095
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,255,0.009391999989748
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,255,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,511,0.01297920048236847
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,255,0.008393599838018417
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,255,0.009408000111579894
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,511,0.013153600692749023
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,511,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,511,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,511,0.013884800672531127
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,511,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,511,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,511,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,511,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,511,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,511,0.011979199945926666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,511,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,511,0.011374399811029435
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,1023,0.014519999921321868
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,1023,0.014608000218868256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,1023,0.012542399764060973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,511,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,1023,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,1023,0.013652800023555756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,1023,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,1023,0.012564800679683685
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,1023,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,1023,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,1023,0.012595200538635254
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,1023,0.01085119992494583
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,1023,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,1023,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,2047,0.01658560037612915
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,1023,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,2047,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,2047,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,2047,0.014481599628925323
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,2047,0.012590399384498597
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,2047,0.014499199390411378
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,2047,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,2047,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,2047,0.012404800206422806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,2047,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,2047,0.01247360035777092
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,2047,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,2047,0.010806400328874588
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,2047,0.010623999685049058
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,4095,0.01860159933567047
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,4095,0.016734400391578676
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,4095,0.014510400593280792
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,4095,0.012555199861526489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,4095,0.012563200294971466
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,4095,0.014864000678062438
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,4095,0.014108799397945404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,4095,0.014731200039386749
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,4095,0.012556800246238708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,4095,0.01257600039243698
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,4095,0.01242239996790886
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,4095,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,4095,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,4095,0.012449599802494049
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,8191,0.019782400131225585
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,8191,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,8191,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,8191,0.020716799795627593
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,8191,0.01685280054807663
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,8191,0.018646399676799773
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,8191,0.018676799535751343
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,8191,0.016641600430011748
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,8191,0.014603200554847717
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,8191,0.016646400094032288
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,8191,0.016638399660587312
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,8191,0.015641599893569946
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,8191,0.015435199439525604
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,8191,0.014577600359916686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,16383,0.020868800580501556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,16383,0.024326400458812715
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,16383,0.02069759964942932
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,16383,0.020790399610996248
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,16383,0.03138880133628845
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,16383,0.020768000185489653
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,16383,0.020670400559902193
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,16383,0.01931679993867874
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,16383,0.018580800294876097
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,16383,0.01865759938955307
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,16383,0.01870719939470291
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,16383,0.02279520034790039
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,32767,0.03729760050773621
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,16383,0.01825280040502548
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,32767,0.02886880040168762
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,32767,0.037427198886871335
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,32767,0.02714880108833313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,32767,0.026870399713516235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,16383,0.017041599750518797
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,32767,0.024864000082015992
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,32767,0.030980798602104186
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,32767,0.02674719989299774
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,32767,0.026814401149749756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,32767,0.023339200019836425
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,32767,0.022788800299167633
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,32767,0.022888000309467315
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,32767,0.02236640006303787
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,65535,0.05755680203437805
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,65535,0.057576000690460205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,65535,0.05183519721031189
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,65535,0.054825598001480104
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,65535,0.05263360142707825
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,65535,0.05285919904708862
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,32767,0.022793599963188173
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,65535,0.05338720083236694
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,65535,0.03505440056324005
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,65535,0.03305439949035645
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,65535,0.028887999057769776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,65535,0.030177599191665648
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,131071,0.08482080101966857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,131071,0.09211840033531189
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,65535,0.030862399935722352
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,65535,0.030953601002693176
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,65535,0.029657599329948426
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,131071,0.07855839729309082
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,131071,0.07683519721031189
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,131071,0.07972959876060486
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,131071,0.07912799715995789
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,131071,0.07895200252532959
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,131071,0.04529919922351837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,131071,0.048449599742889406
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,131071,0.05339199900627136
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,131071,0.04657439887523651
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,131071,0.047121599316596985
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,131071,0.04611999988555908
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,1,0.010521599650382995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,131071,0.052660799026489256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,1,0.010763200372457505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,1,0.010603199899196624
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,1,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,1,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,1,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,1,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,1,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,1,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,1,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,1,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,1,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,1,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,1,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,3,0.0110895998775959
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,3,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,3,0.011007999628782272
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,3,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,3,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,3,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,3,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,3,0.012355200201272964
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,3,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,3,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,3,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,3,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,3,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,3,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,7,0.011854399740695954
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,7,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,7,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,7,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,7,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,7,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,7,0.011539199948310852
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,7,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,7,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,7,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,7,0.009364800155162811
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,7,0.01029760017991066
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,7,0.009457600116729737
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,15,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,15,0.010540799796581268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,7,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,15,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,15,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,15,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,15,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,15,0.01053759977221489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,15,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,15,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,15,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,15,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,15,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,15,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,31,0.010678400099277497
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,15,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,31,0.011208000034093857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,31,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,31,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,31,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,31,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,31,0.010526400059461594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,31,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,31,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,31,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,31,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,31,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,31,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,31,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,63,0.010604800283908844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,63,0.01079839989542961
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,63,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,63,0.010840000212192535
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,63,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,63,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,63,0.01037919968366623
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,63,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,63,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,63,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,63,0.009398400038480758
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,63,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,63,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,63,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,127,0.01071999967098236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,127,0.011564800143241882
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,127,0.01053759977221489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,127,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,127,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,127,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,127,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,127,0.0094480000436306
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,127,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,127,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,127,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,127,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,255,0.011841599643230439
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,127,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,255,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,255,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,255,0.010684800148010255
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,255,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,255,0.01061599999666214
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,255,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,255,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,255,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,127,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,255,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,255,0.009363199770450591
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,255,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,255,0.00936800017952919
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,255,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,511,0.014019200205802917
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,511,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,511,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,511,0.012430399656295776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,511,0.010579200088977813
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,511,0.010543999820947647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,511,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,511,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,511,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,511,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,511,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,511,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,511,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,511,0.010598400235176086
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,1023,0.01653759926557541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,1023,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,1023,0.012894399464130402
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,1023,0.014633600413799287
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,1023,0.01257600039243698
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,1023,0.012510399520397186
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,1023,0.01255359947681427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,1023,0.012860800325870513
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,1023,0.012582400441169738
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,1023,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,1023,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,1023,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,1023,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,1023,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,2047,0.01656640022993088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,2047,0.014591999351978302
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,2047,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,2047,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,2047,0.016648000478744505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,2047,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,2047,0.012614400684833526
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,2047,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,2047,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,2047,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,2047,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,2047,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,2047,0.012415999919176102
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,2047,0.01244639977812767
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,4095,0.020740799605846405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,4095,0.01656319946050644
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,4095,0.016523200273513793
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,4095,0.018598400056362152
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,4095,0.018632000684738158
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,4095,0.016550399363040924
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,4095,0.016172799468040466
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,4095,0.015812799334526062
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,4095,0.014640000462532044
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,4095,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,4095,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,4095,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,4095,0.014614400267601014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,8191,0.018628799915313722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,4095,0.014531199634075165
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,8191,0.019123199582099914
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,8191,0.030742400884628297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,8191,0.02284960001707077
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,8191,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,8191,0.018662400543689728
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,8191,0.018617600202560425
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,8191,0.021294400095939636
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,8191,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,8191,0.01624159961938858
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,8191,0.017900800704956053
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,8191,0.016359999775886536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,8191,0.015132799744606018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,8191,0.0165120005607605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,16383,0.03300159871578216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,16383,0.0247296005487442
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,16383,0.030958399176597595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,16383,0.023998400568962096
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,16383,0.022755199670791627
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,16383,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,16383,0.021294400095939636
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,16383,0.022728000581264497
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,16383,0.020630399882793426
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,16383,0.029931199550628663
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,16383,0.018745599687099455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,16383,0.018667200207710268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,16383,0.018723200261592864
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,32767,0.05175359845161438
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,16383,0.018697600066661834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,32767,0.04907360076904297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,32767,0.05759040117263794
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,32767,0.049089598655700686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,32767,0.049028798937797546
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,32767,0.04754559993743897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,32767,0.026766398549079896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,32767,0.03128319978713989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,32767,0.024963200092315674
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,32767,0.02544800043106079
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,32767,0.03315199911594391
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,32767,0.047491198778152464
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,32767,0.024851199984550477
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,65535,0.0735360026359558
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,65535,0.07879520058631898
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,32767,0.02480800002813339
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,65535,0.07254400253295898
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,65535,0.09499840140342712
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,65535,0.07264000177383423
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,65535,0.04127199947834015
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,65535,0.05299680233001709
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,65535,0.04739840030670166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,65535,0.07316319942474366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,65535,0.04253759980201721
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,65535,0.0731552004814148
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,65535,0.041540798544883725
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,65535,0.04142560064792633
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,131071,0.13230079412460327
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,131071,0.16480159759521484
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,65535,0.04294399917125702
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,131071,0.12298719882965088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,131071,0.12452800273895263
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,131071,0.1250864028930664
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,131071,0.12400640249252319
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,131071,0.07193440198898315
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,131071,0.12367360591888428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,131071,0.06376000046730042
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,131071,0.06384320259094238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,131071,0.06462399959564209
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,131071,0.06375679969787598
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,1,0.012585599720478059
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,1,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,131071,0.08410239815711976
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,131071,0.06384639739990235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,1,0.011929599940776825
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,1,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,1,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,1,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,1,0.0124208003282547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,1,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,1,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,1,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,1,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,1,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,3,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,1,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,1,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,3,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,3,0.010939200222492219
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,3,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,3,0.010524799674749374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,3,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,3,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,3,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,3,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,3,0.012430399656295776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,3,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,3,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,3,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,3,0.01000479981303215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,7,0.011417599767446518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,7,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,7,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,7,0.010980799794197083
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,7,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,7,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,7,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,7,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,7,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,7,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,7,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,7,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,15,0.012457600235939026
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,15,0.01194560006260872
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,7,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,15,0.011428800225257874
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,7,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,15,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,15,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,15,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,15,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,15,0.012579199671745301
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,15,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,15,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,15,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,15,0.010027199983596802
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,31,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,15,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,15,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,31,0.010555200278759003
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,31,0.010524799674749374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,31,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,31,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,31,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,31,0.01055999994277954
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,31,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,31,0.010729599744081497
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,31,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,31,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,31,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,31,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,31,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,63,0.01249760016798973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,63,0.012572799623012543
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,63,0.010520000010728836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,63,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,63,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,63,0.011193600296974183
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,63,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,63,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,63,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,63,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,63,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,63,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,63,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,127,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,127,0.011604800075292587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,127,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,63,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,127,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,127,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,127,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,127,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,127,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,127,0.010523200035095215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,127,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,127,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,127,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,255,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,127,0.010523200035095215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,255,0.011457599699497223
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,255,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,255,0.010704000294208527
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,255,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,127,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,255,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,255,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,255,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,255,0.011633600294589996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,255,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,255,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,255,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,511,0.014617599546909332
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,511,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,255,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,511,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,511,0.013145600259304047
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,255,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,511,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,511,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,511,0.012495999783277511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,511,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,511,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,511,0.010847999900579452
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,511,0.0114656001329422
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,511,0.010532800108194351
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,511,0.01064160019159317
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,1023,0.016659200191497803
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,511,0.010694400221109391
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,1023,0.016630400717258454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,1023,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,1023,0.013911999762058258
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,1023,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,1023,0.01388159990310669
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,1023,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,1023,0.012577599287033081
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,1023,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,1023,0.012590399384498597
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,1023,0.014478400349617004
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,1023,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,1023,0.012435200065374375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,1023,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,2047,0.020660799741744996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,2047,0.016683200001716615
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,2047,0.017667199671268462
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,2047,0.016531200706958772
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,2047,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,2047,0.0162432000041008
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,2047,0.014508800208568573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,2047,0.01658719927072525
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,2047,0.016575999557971954
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,2047,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,2047,0.013441599905490875
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,2047,0.013055999577045441
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,4095,0.02884959876537323
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,2047,0.012531200051307678
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,2047,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,4095,0.01801760047674179
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,4095,0.020713600516319274
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,4095,0.01865600049495697
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,4095,0.018590399622917177
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,4095,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,4095,0.020638400316238405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,4095,0.017985600233078002
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,4095,0.01652639955282211
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,4095,0.014985600113868713
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,4095,0.014532800018787383
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,4095,0.014590400457382201
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,4095,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,4095,0.01656160056591034
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,8191,0.03007520139217377
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,8191,0.022198399901390074
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,8191,0.03319199979305267
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,8191,0.02235199958086014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,8191,0.020788800716400147
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,8191,0.020703999698162077
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,8191,0.018620799481868743
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,8191,0.022627200186252593
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,8191,0.020630399882793426
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,8191,0.016531200706958772
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,8191,0.0247311994433403
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,8191,0.01733279973268509
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,8191,0.01703999936580658
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,16383,0.05074560046195984
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,8191,0.0166703999042511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,16383,0.05827680230140686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,16383,0.04627839922904968
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,16383,0.04548319876194
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,16383,0.045187199115753175
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,16383,0.03075360059738159
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,16383,0.043961599469184875
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,16383,0.046028798818588255
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,16383,0.03296479880809784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,16383,0.022785599529743194
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,16383,0.0227183997631073
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,16383,0.02281759977340698
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,32767,0.09611200094223023
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,32767,0.07403839826583862
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,16383,0.022804799675941467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,32767,0.07221599817276
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,16383,0.022745600342750548
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,32767,0.07303680181503296
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,32767,0.08125920295715332
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,32767,0.0715183973312378
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,32767,0.046438398957252505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,32767,0.06996639966964721
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,32767,0.0411215990781784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,32767,0.05149120092391968
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,32767,0.04099360108375549
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,32767,0.03914400041103363
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,32767,0.039857599139213565
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,32767,0.03956159949302673
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,65535,0.16446880102157593
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,65535,0.13262399435043334
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,65535,0.12496639490127563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,65535,0.12843199968338012
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,65535,0.12308000326156616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,65535,0.13073920011520385
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,65535,0.08310880064964295
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,65535,0.0630400002002716
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,65535,0.0637008011341095
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,65535,0.12609599828720092
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,65535,0.06238080263137817
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,65535,0.0629423975944519
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,65535,0.06997759938240052
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,131071,0.30656321048736573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,131071,0.24170079231262206
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,65535,0.06271520256996155
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,131071,0.2253551959991455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,131071,0.2252255916595459
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,131071,0.11976959705352783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,131071,0.14715039730072021
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,131071,0.2215343952178955
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,131071,0.2244096040725708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,131071,0.10674560070037842
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,131071,0.22346560955047606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,131071,0.1053007960319519
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,131071,0.1063696026802063
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,131071,0.10502400398254394
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,131071,0.10595999956130982
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,1,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,1,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,1,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,1,0.01247360035777092
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,1,0.011420799791812897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,1,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,1,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,1,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,1,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,1,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,1,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,1,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,3,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,3,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,1,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,1,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,3,0.012433599680662155
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,3,0.011446399986743927
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,3,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,3,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,3,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,3,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,3,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,3,0.010363200306892395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,3,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,3,0.0124208003282547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,3,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,7,0.012542399764060973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,3,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,7,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,7,0.011161600053310395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,7,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,7,0.011950399726629257
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,7,0.01141280010342598
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,7,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,7,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,7,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,7,0.012598399817943574
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,7,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,7,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,15,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,7,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,15,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,15,0.011204800009727478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,7,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,15,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,15,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,15,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,15,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,15,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,15,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,15,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,15,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,15,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,15,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,15,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,31,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,31,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,31,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,31,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,31,0.010527999699115753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,31,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,31,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,31,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,31,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,31,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,31,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,31,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,31,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,31,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,63,0.01276639997959137
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,63,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,63,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,63,0.010513599961996078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,63,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,63,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,63,0.011443199962377549
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,63,0.01053759977221489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,63,0.012270399928092956
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,63,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,63,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,63,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,63,0.010340800136327743
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,63,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,127,0.013096000254154205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,127,0.012596799433231354
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,127,0.011228799819946289
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,127,0.011129599809646607
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,127,0.012415999919176102
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,127,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,127,0.011316800117492675
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,127,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,127,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,127,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,127,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,127,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,127,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,255,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,127,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,255,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,255,0.010603199899196624
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,255,0.012142399698495865
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,255,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,255,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,255,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,255,0.012430399656295776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,255,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,255,0.010665600001811982
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,255,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,255,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,255,0.010540799796581268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,511,0.016550399363040924
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,511,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,255,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,511,0.014428800344467163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,511,0.01448799967765808
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,511,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,511,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,511,0.014468799531459808
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,511,0.012435200065374375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,511,0.012670400738716125
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,511,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,511,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,511,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,511,0.011800000071525573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,1023,0.020755200088024138
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,1023,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,1023,0.01462240070104599
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,511,0.011512000113725662
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,1023,0.014608000218868256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,1023,0.01660960018634796
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,1023,0.014604799449443817
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,1023,0.014504000544548035
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,1023,0.014539200067520141
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,1023,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,1023,0.016681599617004394
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,1023,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,1023,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,1023,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,1023,0.012547199428081513
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,2047,0.028815999627113342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,2047,0.018614399433135986
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,2047,0.0176704004406929
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,2047,0.017190399765968322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,2047,0.020739200711250304
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,2047,0.016777600347995757
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,2047,0.014591999351978302
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,2047,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,2047,0.01679839938879013
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,2047,0.02072799950838089
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,2047,0.014496000111103058
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,2047,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,2047,0.014467200636863709
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,4095,0.028964799642562867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,2047,0.014532800018787383
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,4095,0.020630399882793426
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,4095,0.020641599595546723
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,4095,0.030166399478912354
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,4095,0.020692799985408784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,4095,0.020659199357032774
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,4095,0.018619200587272643
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,4095,0.020633600652217865
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,4095,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,4095,0.022779199481010436
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,4095,0.016649599373340606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,4095,0.01581919938325882
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,8191,0.05005279779434204
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,4095,0.0165583997964859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,4095,0.01655520051717758
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,8191,0.05428640246391296
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,8191,0.04518879950046539
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,8191,0.04395360052585602
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,8191,0.04352959990501404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,8191,0.04317440092563629
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,8191,0.04490880072116852
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,8191,0.022756800055503845
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,8191,0.02147040069103241
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,8191,0.0289247989654541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,8191,0.03031199872493744
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,8191,0.020739200711250304
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,8191,0.021163199841976166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,8191,0.020953600108623505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,16383,0.07609760165214538
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,16383,0.0727407991886139
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,16383,0.09329119920730591
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,16383,0.07124159932136535
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,16383,0.06976159811019897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,16383,0.0721552014350891
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,16383,0.07180479764938355
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,16383,0.03957920074462891
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,16383,0.038815999031066896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,16383,0.04939840137958527
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,16383,0.03910239934921265
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,16383,0.03787679970264435
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,16383,0.04534560143947601
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,32767,0.1703503966331482
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,16383,0.03882719874382019
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,32767,0.13326879739761352
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,32767,0.12429120540618896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,32767,0.1284000039100647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,32767,0.12572000026702881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,32767,0.1258239984512329
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,32767,0.06839039921760559
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,32767,0.06047199964523316
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,32767,0.06036319732666016
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,32767,0.12112480401992798
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,32767,0.0597055971622467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,32767,0.08606879711151123
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,32767,0.05955680012702942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,65535,0.246561598777771
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,65535,0.32767999172210693
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,65535,0.2282032012939453
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,32767,0.061990398168563846
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,65535,0.2297136068344116
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,65535,0.22523200511932373
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,65535,0.2264159917831421
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,65535,0.22799038887023926
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,65535,0.11923680305480958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,65535,0.15543839931488038
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,65535,0.10642080307006836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,65535,0.10536799430847169
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,65535,0.10467840433120727
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,65535,0.10462559461593628
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,65535,0.10378719568252563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,131071,0.6393152236938476
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,131071,0.42634081840515137
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,131071,0.4229423999786377
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,131071,0.4602191925048828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,131071,0.4229248046875
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,131071,0.42223200798034666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,131071,0.19200799465179444
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,131071,0.18905600309371948
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,131071,0.22603039741516112
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,131071,0.4259200096130371
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,131071,0.18887360095977784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,131071,0.18874080181121827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,131071,0.3019615888595581
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,1,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,131071,0.18882240056991578
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,1,0.012563200294971466
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,1,0.012697599828243256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,1,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,1,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,1,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,1,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,1,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,1,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,1,0.011444800347089768
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,1,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,1,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,1,0.011028800159692764
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,3,0.014508800208568573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,1,0.011267200112342834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,3,0.012432000041007996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,3,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,3,0.012583999335765839
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,3,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,3,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,3,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,3,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,3,0.012755200266838074
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,3,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,3,0.011881600320339202
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,3,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,3,0.010371199995279311
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,3,0.010555200278759003
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,7,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,7,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,7,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,7,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,7,0.012430399656295776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,7,0.012043199688196182
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,7,0.012558400630950928
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,7,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,7,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,7,0.012585599720478059
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,7,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,7,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,7,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,7,0.010524799674749374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,15,0.012580800056457519
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,15,0.014523200690746307
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,15,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,15,0.01249760016798973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,15,0.012415999919176102
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,15,0.0124208003282547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,15,0.012505599856376648
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,15,0.012567999958992004
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,15,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,15,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,15,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,15,0.011448000371456147
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,15,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,15,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,31,0.012566399574279786
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,31,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,31,0.01462240070104599
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,31,0.012574400007724761
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,31,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,31,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,31,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,31,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,31,0.010814400017261505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,31,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,31,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,31,0.010596799850463866
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,31,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,31,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,63,0.014567999541759491
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,63,0.012495999783277511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,63,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,63,0.01257600039243698
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,63,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,63,0.010590399801731109
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,63,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,63,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,63,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,63,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,63,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,63,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,63,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,63,0.010444799810647965
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,127,0.014523200690746307
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,127,0.01249919980764389
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,127,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,127,0.012566399574279786
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,127,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,127,0.012430399656295776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,127,0.01263200044631958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,127,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,127,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,127,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,127,0.012406399846076966
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,127,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,127,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,127,0.010532800108194351
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,255,0.01247519999742508
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,255,0.012571200728416443
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,255,0.014584000408649444
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,255,0.012435200065374375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,255,0.012433599680662155
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,255,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,255,0.01244639977812767
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,255,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,255,0.012569600343704223
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,255,0.011033599823713302
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,255,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,255,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,255,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,255,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,511,0.01653279960155487
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,511,0.018937599658966065
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,511,0.014683200418949128
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,511,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,511,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,511,0.015089599788188935
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,511,0.014585599303245544
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,511,0.014646400511264802
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,511,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,511,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,511,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,511,0.012529599666595458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,511,0.014206400513648987
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,511,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,1023,0.02547839879989624
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,1023,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,1023,0.01660960018634796
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,1023,0.02080959975719452
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,1023,0.01655679941177368
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,1023,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,1023,0.016654400527477263
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,1023,0.01653759926557541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,1023,0.018702399730682374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,1023,0.014499199390411378
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,1023,0.013604800403118133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,1023,0.012556800246238708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,1023,0.013475200533866883
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,1023,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,2047,0.030972799658775328
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,2047,0.02046239972114563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,2047,0.020659199357032774
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,2047,0.020732800662517547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,2047,0.02691679894924164
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,2047,0.019623999297618867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,2047,0.020660799741744996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,2047,0.020606400072574617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,2047,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,2047,0.014627200365066529
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,2047,0.020744000375270844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,2047,0.014611199498176575
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,2047,0.015627199411392213
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,2047,0.015199999511241912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,4095,0.04922240078449249
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,4095,0.042947199940681455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,4095,0.055534398555755614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,4095,0.04133760035037994
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,4095,0.0207056000828743
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,4095,0.029135999083518983
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,4095,0.04148800075054169
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,4095,0.0268528014421463
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,4095,0.020340800285339355
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,4095,0.04079360067844391
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,4095,0.019407999515533448
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,4095,0.04128159880638123
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,4095,0.019526399672031403
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,4095,0.019254399836063384
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,8191,0.09455199837684632
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,8191,0.07017920017242432
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,8191,0.07674880027770996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,8191,0.06816480159759522
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,8191,0.06836479902267456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,8191,0.0695855975151062
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,8191,0.04525119960308075
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,8191,0.05072640180587769
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,8191,0.0376911997795105
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,8191,0.03701600134372711
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,8191,0.06812639832496643
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,8191,0.037195199728012086
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,8191,0.03694719970226288
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,16383,0.1299839973449707
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,8191,0.03687680065631867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,16383,0.12298239469528198
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,16383,0.17244479656219483
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,16383,0.12121599912643433
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,16383,0.07020639777183532
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,16383,0.12197920083999633
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,16383,0.08613759875297547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,16383,0.12106720209121705
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,16383,0.11986720561981201
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,16383,0.06024640202522278
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,16383,0.059566402435302736
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,16383,0.05925599932670593
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,16383,0.05891039967536926
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,16383,0.05961920022964477
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,32767,0.24230880737304689
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,32767,0.22904961109161376
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,32767,0.23161439895629882
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,32767,0.22825279235839843
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,32767,0.22817599773406982
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,32767,0.32769439220428465
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,32767,0.22777280807495118
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,32767,0.10603200197219849
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,32767,0.10506080389022827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,32767,0.10465760231018066
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,32767,0.15627039670944215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,32767,0.10460000038146973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,32767,0.12348480224609375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,32767,0.10468159914016724
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,65535,0.6409872055053711
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,65535,0.44126081466674805
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,65535,0.46922879219055175
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,65535,0.43878879547119143
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,65535,0.4379439830780029
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,65535,0.4363071918487549
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,65535,0.23456799983978271
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,65535,0.30059199333190917
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,65535,0.19454560279846192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,65535,0.19576799869537354
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,65535,0.4356351852416992
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,65535,0.19374239444732666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,65535,0.19550880193710327
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,65535,0.19470239877700807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,1,0.013923199474811554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,1,0.0126351997256279
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,1,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,1,0.014619199931621552
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,1,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,1,0.01244639977812767
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,1,0.012588800489902496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,1,0.012571200728416443
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,1,0.01244639977812767
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,1,0.012572799623012543
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,1,0.012486399710178375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,1,0.011497599631547928
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,1,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,3,0.013867199420928955
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,3,0.014508800208568573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,1,0.01173119992017746
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,3,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,3,0.012439999729394913
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,3,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,3,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,3,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,3,0.01249760016798973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,3,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,3,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,3,0.01080320030450821
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,3,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,7,0.013672000169754029
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,3,0.010807999968528747
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,3,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,7,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,7,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,7,0.012556800246238708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,7,0.012521600723266602
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,7,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,7,0.012574400007724761
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,7,0.012459199875593185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,7,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,7,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,7,0.010513599961996078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,7,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,7,0.012486399710178375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,7,0.01056160032749176
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,15,0.014612799882888794
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,15,0.014348800480365752
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,15,0.01249760016798973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,15,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,15,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,15,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,15,0.012558400630950928
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,15,0.012521600723266602
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,15,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,15,0.010527999699115753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,15,0.010579200088977813
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,15,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,31,0.014075200259685516
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,15,0.012547199428081513
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,31,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,15,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,31,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,31,0.012574400007724761
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,31,0.01252799928188324
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,31,0.014588800072669984
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,31,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,31,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,31,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,31,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,31,0.010550399869680404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,63,0.01435520052909851
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,31,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,31,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,63,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,63,0.01255040019750595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,63,0.01284479945898056
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,63,0.012542399764060973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,31,0.01255359947681427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,63,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,63,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,63,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,63,0.012505599856376648
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,63,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,63,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,63,0.010689599812030793
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,63,0.010601600259542465
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,127,0.012673600018024445
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,127,0.014468799531459808
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,63,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,127,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,127,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,127,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,127,0.01353919953107834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,127,0.0126351997256279
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,127,0.012484800070524216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,127,0.012564800679683685
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,127,0.012603199481964112
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,127,0.010542400181293488
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,127,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,255,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,127,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,255,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,127,0.010911999642848969
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,255,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,255,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,255,0.012449599802494049
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,255,0.014476799964904785
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,255,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,255,0.012556800246238708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,255,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,255,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,255,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,255,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,255,0.012484800070524216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,255,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,511,0.02271360009908676
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,511,0.018699200451374055
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,511,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,511,0.016641600430011748
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,511,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,511,0.015561600029468537
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,511,0.02066880017518997
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,511,0.0150751993060112
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,511,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,511,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,511,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,511,0.01340160071849823
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,1023,0.02887359857559204
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,511,0.014508800208568573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,511,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,1023,0.02069920003414154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,1023,0.01870400011539459
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,1023,0.019415999948978423
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,1023,0.018628799915313722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,1023,0.03095200061798096
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,1023,0.01879200041294098
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,1023,0.022734400629997254
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,1023,0.01860000044107437
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,1023,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,1023,0.016550399363040924
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,1023,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,1023,0.014459200203418732
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,2047,0.05137119889259338
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,1023,0.01454080045223236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,2047,0.04274719953536987
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,2047,0.04460479915142059
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,2047,0.054343998432159424
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,2047,0.0422111988067627
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,2047,0.04223519861698151
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,2047,0.043375998735427856
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,2047,0.02067199945449829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,2047,0.028966400027275085
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,2047,0.01865919977426529
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,2047,0.018592000007629395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,2047,0.018648000061511995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,2047,0.01857600063085556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,2047,0.029204800724983215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,4095,0.09154239892959595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,4095,0.07020000219345093
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,4095,0.06916800141334534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,4095,0.06977599859237671
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,4095,0.06976320147514344
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,4095,0.06899359822273254
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,4095,0.047328001260757445
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,4095,0.07858880162239075
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,4095,0.04933600127696991
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,4095,0.0390720009803772
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,4095,0.037308800220489505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,4095,0.03702400028705597
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,4095,0.03712800145149231
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,4095,0.0369951993227005
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,8191,0.13206880092620848
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,8191,0.12437280416488647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,8191,0.12332320213317871
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,8191,0.12286720275878907
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,8191,0.16801600456237792
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,8191,0.1221343994140625
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,8191,0.0728767991065979
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,8191,0.06190400123596192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,8191,0.060172802209854125
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,8191,0.05961120128631592
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,8191,0.0844319999217987
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,8191,0.12180639505386352
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,8191,0.059331202507019044
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,8191,0.05950400233268738
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,16383,0.32194879055023196
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,16383,0.23231520652770996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,16383,0.24507999420166016
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,16383,0.22873120307922362
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,16383,0.2296447992324829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,16383,0.22710878849029542
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,16383,0.1550943970680237
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,16383,0.10699520111083985
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,16383,0.10578880310058594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,16383,0.1254256010055542
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,16383,0.1046288013458252
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,16383,0.22922239303588868
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,16383,0.10467519760131835
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,16383,0.1046064019203186
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,32767,0.47426400184631345
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,32767,0.6284736156463623
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,32767,0.4427152156829834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,32767,0.43946399688720705
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,32767,0.43966240882873536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,32767,0.4379312038421631
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,32767,0.29955039024353025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,32767,0.23690400123596192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,32767,0.19706079959869385
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,32767,0.437497615814209
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,32767,0.19580639600753785
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,32767,0.19423840045928956
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,32767,0.19520479440689087
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,1,0.016604800522327424
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,1,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,32767,0.1953935980796814
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,1,0.020745599269866945
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,1,0.013166399300098419
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,1,0.01316000074148178
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,1,0.012664000689983367
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,1,0.012929600477218629
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,1,0.01653439998626709
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,1,0.018092800676822663
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,1,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,1,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,1,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,1,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,1,0.012403199821710587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,3,0.020502400398254395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,3,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,3,0.01666879951953888
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,3,0.014481599628925323
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,3,0.01658560037612915
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,3,0.01690559983253479
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,3,0.014305600523948669
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,3,0.012868799269199371
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,3,0.01385280042886734
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,3,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,3,0.01244639977812767
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,3,0.010718400031328202
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,7,0.016577599942684172
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,3,0.012555199861526489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,3,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,7,0.020665599405765532
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,7,0.014467200636863709
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,7,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,7,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,7,0.01390720009803772
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,7,0.01390720009803772
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,7,0.01252799928188324
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,7,0.012459199875593185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,7,0.017150400578975676
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,7,0.0166143998503685
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,7,0.011953599750995636
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,7,0.011113599687814713
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,7,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,15,0.01656160056591034
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,15,0.014526399970054626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,15,0.020822399854660036
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,15,0.013592000305652618
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,15,0.01451839953660965
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,15,0.014481599628925323
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,15,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,15,0.01664319932460785
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,15,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,15,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,15,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,15,0.012459199875593185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,15,0.01247360035777092
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,15,0.011212799698114395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,31,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,31,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,31,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,31,0.01449279934167862
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,31,0.0166703999042511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,31,0.020718400180339814
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,31,0.014468799531459808
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,31,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,31,0.018167999386787415
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,31,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,31,0.012479999661445617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,31,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,63,0.01661760061979294
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,31,0.011444800347089768
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,31,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,63,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,63,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,63,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,63,0.020652799308300017
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,63,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,63,0.014504000544548035
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,63,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,63,0.015436799824237823
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,63,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,63,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,63,0.017892800271511078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,63,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,63,0.012435200065374375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,127,0.016579200327396394
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,127,0.014535999298095703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,127,0.014459200203418732
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,127,0.02073120027780533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,127,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,127,0.014499199390411378
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,127,0.017550399899482726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,127,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,127,0.014486399292945863
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,127,0.016310399770736693
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,127,0.012404800206422806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,127,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,255,0.018651199340820313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,255,0.020656000077724456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,255,0.014524799585342408
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,127,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,255,0.014591999351978302
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,255,0.014519999921321868
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,255,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,255,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,255,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,127,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,255,0.018505600094795228
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,255,0.012244799733161926
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,255,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,255,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,255,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,255,0.011355199664831162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,511,0.020627200603485107
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,511,0.026870399713516235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,511,0.01858399957418442
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,511,0.018607999384403228
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,511,0.03394080102443695
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,511,0.018027199804782866
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,511,0.020660799741744996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,511,0.018675200641155243
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,511,0.02268480062484741
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,511,0.014523200690746307
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,511,0.014470399916172027
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,511,0.014608000218868256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,511,0.012484800070524216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,1023,0.04848000109195709
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,1023,0.043131199479103086
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,511,0.014476799964904785
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,1023,0.05557439923286438
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,1023,0.041631999611854556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,1023,0.04073440134525299
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,1023,0.04116640090942383
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,1023,0.026833599805831908
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,1023,0.03994399905204773
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,1023,0.03198240101337433
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,1023,0.01865919977426529
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,1023,0.01857759952545166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,2047,0.07542399764060974
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,1023,0.016942399740219116
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,1023,0.018691200017929076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,1023,0.018675200641155243
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,2047,0.09524160027503967
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,2047,0.0683135986328125
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,2047,0.06837279796600342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,2047,0.06719840168952942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,2047,0.06782400012016296
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,2047,0.06682080030441284
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,2047,0.05190879702568054
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,2047,0.03639039993286133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,2047,0.04534080028533936
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,2047,0.03651680052280426
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,2047,0.03706879913806915
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,2047,0.03601120114326477
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,2047,0.03507519960403442
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,4095,0.12945760488510133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,4095,0.12125439643859863
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,4095,0.1217919945716858
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,4095,0.12105120420455932
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,4095,0.17466559410095214
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,4095,0.11918720006942748
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,4095,0.07050880193710327
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,4095,0.08802400231361389
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,4095,0.060444802045822144
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,4095,0.05957279801368713
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,4095,0.11928160190582275
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,4095,0.05926399827003479
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,8191,0.23813600540161134
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,4095,0.05949599742889404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,4095,0.058526402711868285
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,8191,0.22505600452423097
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,8191,0.22845280170440674
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,8191,0.32798879146575927
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,8191,0.2271791934967041
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,8191,0.22450881004333495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,8191,0.12446399927139282
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,8191,0.22571840286254882
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,8191,0.10452159643173217
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,8191,0.1594256043434143
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,8191,0.10388799905776977
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,8191,0.1042512059211731
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,8191,0.10535680055618286
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,8191,0.10430879592895508
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,16383,0.6308544158935547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,16383,0.43352479934692384
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,16383,0.47023677825927734
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,16383,0.43435039520263674
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,16383,0.4351168155670166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,16383,0.4364431858062744
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,16383,0.23220961093902587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,16383,0.3028784036636353
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,16383,0.4338528156280518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,16383,0.19511200189590455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,16383,0.1936463952064514
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,16383,0.1942463994026184
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,1,0.024758400022983552
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,1,0.031020799279212953
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,16383,0.19368159770965576
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,16383,0.19381760358810424
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,1,0.020670400559902193
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,1,0.020043200254440306
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,1,0.018700799345970152
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,1,0.020110400021076204
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,1,0.02298559993505478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,1,0.01791999936103821
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,1,0.01623679995536804
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,1,0.018811200559139252
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,1,0.026918399333953857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,1,0.016531200706958772
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,1,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,3,0.024726399779319765
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,1,0.015736000239849092
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,3,0.03107840120792389
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,3,0.020737600326538087
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,3,0.01980320066213608
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,3,0.019739200174808503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,3,0.022806400060653688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,3,0.019390399754047393
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,3,0.01929119974374771
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,3,0.02689119875431061
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,3,0.016572800278663636
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,3,0.017684799432754517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,3,0.01658080071210861
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,3,0.014614400267601014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,7,0.03293119966983795
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,3,0.016595199704170227
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,7,0.02473440021276474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,7,0.02035519927740097
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,7,0.020785599946975708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,7,0.01923999935388565
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,7,0.018692800402641298
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,7,0.019497600197792054
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,7,0.016548800468444824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,7,0.02292959988117218
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,7,0.0158160001039505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,7,0.014640000462532044
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,7,0.026859200000762938
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,15,0.024728000164031982
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,7,0.015022400021553039
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,7,0.01855359971523285
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,15,0.03201279938220978
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,15,0.01892160028219223
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,15,0.020401600003242492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,15,0.02083519995212555
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,15,0.02279839962720871
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,15,0.019998399913311003
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,15,0.020151999592781068
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,15,0.018606400489807128
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,15,0.026872000098228453
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,15,0.016545599699020384
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,15,0.014614400267601014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,15,0.01653759926557541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,31,0.02484000027179718
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,15,0.016539199650287627
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,31,0.02067199945449829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,31,0.019920000433921815
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,31,0.033032000064849854
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,31,0.019806399941444397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,31,0.02285120040178299
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,31,0.02012320011854172
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,31,0.020744000375270844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,31,0.0183119997382164
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,31,0.016513599455356597
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,31,0.02687999904155731
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,31,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,63,0.02489439994096756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,63,0.020734399557113647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,31,0.015880000591278077
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,31,0.01661919951438904
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,63,0.03304159939289093
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,63,0.019467200338840484
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,63,0.020644800364971162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,63,0.020703999698162077
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,63,0.023017600178718567
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,63,0.020235200226306916
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,63,0.01663520038127899
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,63,0.01855839937925339
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,63,0.02688480019569397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,63,0.016254399716854096
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,63,0.016441600024700166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,127,0.026785600185394286
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,127,0.03225440084934235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,127,0.020815999805927278
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,63,0.01650079935789108
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,127,0.018718400597572328
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,127,0.020448000729084016
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,127,0.0205375999212265
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,127,0.02080159932374954
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,127,0.026892799139022826
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,127,0.022782400250434875
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,127,0.016590400040149687
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,127,0.01563040018081665
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,127,0.01611199975013733
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,127,0.018585599958896637
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,127,0.016542400419712066
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,255,0.03725599944591522
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,255,0.03775199949741363
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,255,0.020372800529003143
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,255,0.022147199511528014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,255,0.019516800343990327
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,255,0.020472000539302825
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,255,0.02043360024690628
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,255,0.018649600446224213
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,255,0.01659200042486191
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,255,0.026969599723815917
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,255,0.015038399398326874
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,255,0.014996799826622009
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,255,0.026897600293159483
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,255,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,511,0.0443807989358902
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,511,0.05960800051689148
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,511,0.04121760129928589
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,511,0.04251680076122284
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,511,0.03913280069828033
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,511,0.053609597682952884
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,511,0.04209760129451752
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,511,0.04100480079650879
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,511,0.03747360110282898
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,511,0.02279680073261261
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,511,0.020662400126457214
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,511,0.02046400010585785
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,511,0.020654399693012238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,1023,0.08346719741821289
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,1023,0.07114560008049012
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,1023,0.09845920205116272
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,1023,0.06825439929962158
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,1023,0.06792960166931153
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,1023,0.06833279728889466
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,1023,0.068259197473526
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,1023,0.05588639974594116
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,1023,0.037427198886871335
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,1023,0.03698239922523498
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,1023,0.052455997467041014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,1023,0.03699040114879608
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,1023,0.040092799067497256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,1023,0.03684319853782654
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,511,0.020710399746894835
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,2047,0.13718559741973876
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,2047,0.17685920000076294
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,2047,0.12316160202026367
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,2047,0.12039359807968139
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,2047,0.12026560306549072
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,2047,0.11878559589385987
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,2047,0.11917760372161865
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,2047,0.09222080111503601
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,2047,0.06438720226287842
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,2047,0.06185439825057983
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,2047,0.06120319962501526
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,4095,0.2471535921096802
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,2047,0.06065760254859924
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,2047,0.06121600270271301
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,4095,0.22707839012145997
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,4095,0.32768640518188474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,4095,0.2236191987991333
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,4095,0.22483038902282715
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,2047,0.08055999875068665
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,4095,0.22313919067382812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,4095,0.16397119760513307
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,4095,0.11071200370788574
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,4095,0.22338240146636962
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,4095,0.13400319814682007
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,4095,0.10789920091629028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,4095,0.10656800270080566
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,4095,0.10615839958190917
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,8191,0.4729680061340332
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,8191,0.6314479827880859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,8191,0.437007999420166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,4095,0.10685919523239136
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,8191,0.431763219833374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,8191,0.4286719799041748
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,8191,0.428223991394043
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,8191,0.4319039821624756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,8191,0.24431519508361815
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,8191,0.30617120265960696
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,8191,0.19659039974212647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,8191,0.20068800449371338
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,8191,0.19630399942398072
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,8191,0.19724160432815552
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,1,0.053067201375961305
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,1,0.04521119892597199
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,1,0.03232159912586212
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,8191,0.1980191946029663
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,1,0.02953760027885437
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,1,0.02894560098648071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,1,0.03911679983139038
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,1,0.04504800140857697
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,1,0.02969439923763275
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,1,0.03060320019721985
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,1,0.026888000965118408
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,1,0.02274879962205887
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,1,0.02361599951982498
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,1,0.022742399573326112
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,1,0.022673599421977997
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,3,0.045228800177574156
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,3,0.05359359979629517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,3,0.032969599962234496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,3,0.0289792001247406
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,3,0.030248001217842102
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,3,0.04458560049533844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,3,0.029625600576400755
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,3,0.039340800046920775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,3,0.03054400086402893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,3,0.022838400304317476
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,3,0.023284800350666046
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,3,0.023270399868488313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,3,0.026878398656845093
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,7,0.045284798741340636
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,7,0.053832000494003295
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,7,0.02977119982242584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,3,0.022676800191402436
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,7,0.03324480056762695
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,7,0.030670401453971863
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,7,0.03914240002632141
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,7,0.04387199878692627
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,7,0.030883198976516722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,7,0.030851200222969055
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,7,0.026855999231338502
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,7,0.02276480048894882
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,7,0.022787199914455415
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,7,0.022991999983787537
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,15,0.0453247994184494
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,15,0.0298880010843277
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,15,0.030865600705146788
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,15,0.0537168025970459
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,15,0.033036801218986514
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,7,0.022784000635147093
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,15,0.045259198546409606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,15,0.0408048003911972
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,15,0.029462400078773498
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,15,0.02736000120639801
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,15,0.030910399556159974
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,15,0.023604799807071686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,15,0.022838400304317476
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,15,0.023025600612163542
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,31,0.04604640007019043
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,31,0.03303999900817871
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,31,0.05506399869918823
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,15,0.02279199957847595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,31,0.030884799361228944
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,31,0.030270400643348693
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,31,0.030881598591804504
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,31,0.04103200137615204
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,31,0.045132800936698914
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,31,0.031011199951171874
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,31,0.026844799518585205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,31,0.02289759963750839
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,63,0.04693439900875092
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,31,0.023668800294399262
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,31,0.02268799990415573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,31,0.023529599606990814
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,63,0.0558351993560791
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,63,0.033004799485206605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,63,0.029971200227737426
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,63,0.029047998785972595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,63,0.030054399371147157
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,63,0.026955199241638184
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,63,0.04110080003738403
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,63,0.030862399935722352
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,63,0.02279839962720871
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,63,0.044363200664520264
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,63,0.024743999540805816
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,63,0.02279040068387985
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,127,0.049568000435829165
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,63,0.022728000581264497
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,127,0.030588799715042116
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,127,0.058315199613571164
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,127,0.03721440136432648
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,127,0.029259198904037477
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,127,0.030990400910377504
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,127,0.04316479861736298
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,127,0.030953601002693176
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,127,0.04529280066490173
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,127,0.02686080038547516
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,127,0.024007999897003175
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,127,0.02281759977340698
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,255,0.06586880087852479
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,255,0.0432671993970871
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,127,0.022819200158119203
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,255,0.04827040135860443
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,255,0.06185920238494873
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,255,0.04274879992008209
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,255,0.04214400053024292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,255,0.04306559860706329
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,127,0.024027200043201448
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,255,0.04736000001430511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,255,0.05161600112915039
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,255,0.02890399992465973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,255,0.024513599276542664
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,255,0.022865599393844603
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,255,0.02476000040769577
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,511,0.09463199973106384
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,511,0.09863359928131103
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,255,0.022868800163269042
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,511,0.07477759718894958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,511,0.07162399888038636
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,511,0.06471520066261291
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,511,0.07100160121917724
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,511,0.07180799841880799
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,511,0.06988160014152527
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,511,0.0622111976146698
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,511,0.03934879899024964
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,511,0.03907040059566498
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,511,0.040801599621772766
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,511,0.03914240002632141
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,511,0.0441103994846344
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,1023,0.12820160388946533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,1023,0.16936639547348023
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,1023,0.15000799894332886
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,1023,0.12289600372314453
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,1023,0.12362560033798217
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,1023,0.12148480415344239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,1023,0.09503520131111146
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,1023,0.12111200094223022
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,1023,0.06823359727859497
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,1023,0.0635424017906189
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,1023,0.06314399838447571
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,1023,0.061766397953033444
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,1023,0.09157919883728027
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,1023,0.06165760159492493
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,2047,0.2623280048370361
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,2047,0.23167040348052978
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,2047,0.22276480197906495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,2047,0.2255120038986206
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,2047,0.3124000072479248
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,2047,0.14670079946517944
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,2047,0.22307040691375732
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,2047,0.22473759651184083
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,2047,0.11098719835281372
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,2047,0.1155168056488037
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,2047,0.15998879671096802
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,2047,0.10937119722366333
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,2047,0.1080623984336853
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,2047,0.10868799686431885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,4095,0.4413951873779297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,4095,0.4951183795928955
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,4095,0.5859551906585694
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,4095,0.4296000003814697
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,4095,0.4322927951812744
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,4095,0.4333471775054932
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,4095,0.43150877952575684
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,4095,0.2940351963043213
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,4095,0.25660479068756104
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,4095,0.20309760570526122
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,4095,0.20007359981536865
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,1,0.078302401304245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,4095,0.1992848038673401
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,4095,0.20908160209655763
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,1,0.10107680559158325
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,4095,0.2007200002670288
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,1,0.04906240105628967
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,1,0.05456960201263428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,1,0.049116799235343934
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,1,0.048091199994087216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,1,0.08041759729385375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,1,0.049379199743270874
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,1,0.07332320213317871
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,1,0.03905439972877502
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,1,0.03714239895343781
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,1,0.03705599904060364
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,1,0.03711200058460236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,3,0.07907519936561584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,1,0.04557119905948639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,3,0.04972479939460754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,3,0.04930559992790222
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,3,0.055318397283554074
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,3,0.10018880367279052
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,3,0.04899519979953766
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,3,0.07216640114784241
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,3,0.045814400911331175
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,3,0.049423998594284056
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,3,0.08043519854545593
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,3,0.03908160030841827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,7,0.07972800135612487
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,3,0.03700959980487824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,7,0.09999679923057556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,3,0.037036800384521486
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,3,0.037136000394821164
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,7,0.055430400371551516
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,7,0.04955199956893921
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,7,0.04930559992790222
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,7,0.07235519886016846
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,7,0.08156639933586121
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,7,0.04979360103607178
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,7,0.04930399954319
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,7,0.046014401316642764
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,7,0.03918719887733459
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,7,0.037118399143218996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,7,0.03704000115394592
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,7,0.03711200058460236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,15,0.050387197732925416
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,15,0.10315359830856323
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,15,0.05108640193939209
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,15,0.04942879974842072
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,15,0.07838720083236694
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,15,0.07358400225639343
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,15,0.05537440180778504
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,15,0.049420800805091855
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,15,0.0820032000541687
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,15,0.0375216007232666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,15,0.039175999164581296
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,15,0.03702079951763153
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,15,0.04574880003929138
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,15,0.03714880049228668
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,31,0.07809759974479676
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,31,0.057601600885391235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,31,0.05144320130348205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,31,0.0496288001537323
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,31,0.04939360022544861
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,31,0.049409601092338565
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,31,0.10212639570236207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,31,0.07388160228729249
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,31,0.046823999285697936
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,31,0.03917120099067688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,31,0.03714239895343781
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,31,0.03731360137462616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,31,0.08173279762268067
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,31,0.03713279962539673
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,63,0.05323359966278076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,63,0.05947359800338745
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,63,0.050128000974655154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,63,0.100382399559021
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,63,0.049534401297569274
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,63,0.07926239967346191
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,63,0.04938080012798309
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,63,0.04715520143508911
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,63,0.03919360041618347
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,63,0.07409120202064515
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,63,0.08269919753074646
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,63,0.03712959885597229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,63,0.03711360096931458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,127,0.08882240056991578
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,127,0.10446560382843018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,127,0.06376479864120484
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,127,0.055664002895355225
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,63,0.037038400769233704
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,127,0.058543998003005984
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,127,0.05555199980735779
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,127,0.05428479909896851
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,127,0.04933600127696991
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,127,0.08261280059814453
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,127,0.07427359819412231
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,127,0.04067519903182983
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,127,0.0371535986661911
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,127,0.037529599666595456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,127,0.03851679861545563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,255,0.11008000373840332
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,255,0.07846559882164002
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,255,0.11291680335998536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,255,0.07099040150642395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,255,0.07192000150680541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,255,0.06857759952545166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,255,0.08158559799194336
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,255,0.04220480024814606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,255,0.05153759717941284
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,255,0.0678063988685608
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,255,0.04374879896640778
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,255,0.08819040060043334
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,255,0.041196799278259276
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,255,0.04126720130443573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,511,0.16612000465393068
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,511,0.1841279983520508
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,511,0.13055360317230225
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,511,0.11909600496292114
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,511,0.12190560102462769
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,511,0.12043360471725464
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,511,0.12023359537124634
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,511,0.11059360504150391
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,511,0.07303680181503296
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,511,0.11027359962463379
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,511,0.06637759804725647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,511,0.0636672019958496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,511,0.06202080249786377
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,511,0.06298720240592956
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,1023,0.327891206741333
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,1023,0.2261296033859253
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,1023,0.2147520065307617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,1023,0.27220160961151124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,1023,0.21829440593719482
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,1023,0.21302719116210939
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,1023,0.17606879472732545
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,1023,0.1176367998123169
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,1023,0.2123136043548584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,1023,0.11080800294876099
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,1023,0.16270400285720826
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,1023,0.1073248028755188
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,1023,0.10482879877090454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,1023,0.10578720569610596
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,2047,0.49138717651367186
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,2047,0.424345588684082
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,2047,0.6084208011627197
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,2047,0.4143216133117676
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,2047,0.41042242050170896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,2047,0.407041597366333
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,2047,0.3107471942901611
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,2047,0.40721759796142576
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,2047,0.27673280239105225
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,2047,0.20646240711212158
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,2047,0.19831039905548095
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,2047,0.19154880046844483
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,1,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,1,0.010897599905729295
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,2047,0.1933583974838257
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,1,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,2047,0.1949615955352783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,1,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,1,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,1,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,1,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,1,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,1,0.010363200306892395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,1,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,1,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,1,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,1,0.008446399867534638
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,3,0.010542400181293488
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,1,0.009241600334644318
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,3,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,3,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,3,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,3,0.010531199723482132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,3,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,3,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,3,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,3,0.010447999835014344
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,3,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,3,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,3,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,7,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,3,0.0103472001850605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,3,0.009375999867916106
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,7,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,7,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,7,0.010795199871063232
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,7,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,7,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,7,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,7,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,7,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,7,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,7,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,7,0.008463999629020691
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,7,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,7,0.008895999938249587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,15,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,15,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,15,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,15,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,15,0.009312000125646591
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,15,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,15,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,15,0.009672000259160995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,15,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,15,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,15,0.008417599648237229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,15,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,31,0.010520000010728836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,15,0.008459199965000153
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,31,0.010465600341558457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,15,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,31,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,31,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,31,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,31,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,31,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,31,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,31,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,31,0.008451200276613235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,63,0.011147200316190719
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,31,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,31,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,63,0.01080159991979599
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,31,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,31,0.008454400300979614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,63,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,63,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,63,0.010521599650382995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,63,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,63,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,63,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,63,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,63,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,63,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,63,0.008985599875450135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,63,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,63,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,127,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,127,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,127,0.01093600019812584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,127,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,127,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,127,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,127,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,127,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,127,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,127,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,127,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,127,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,127,0.009167999774217606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,127,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,255,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,255,0.010903999954462052
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,255,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,255,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,255,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,255,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,255,0.010353600233793258
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,255,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,255,0.010830400139093399
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,255,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,255,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,255,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,255,0.008425600081682205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,511,0.012459199875593185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,255,0.009091199934482574
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,511,0.01255040019750595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,511,0.013583999872207642
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,511,0.012547199428081513
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,511,0.012299200147390365
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,511,0.01080320030450821
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,511,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,511,0.01170400008559227
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,511,0.012324800342321396
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,511,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,511,0.011403200030326844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,511,0.010531199723482132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,511,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,511,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,1023,0.016542400419712066
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,1023,0.015956799685955047
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,1023,0.014593599736690522
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,1023,0.0135903999209404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,1023,0.014627200365066529
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,1023,0.013070400059223174
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,1023,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,1023,0.014496000111103058
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,1023,0.01371839940547943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,1023,0.013252800703048706
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,1023,0.012590399384498597
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,1023,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,2047,0.016574400663375854
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,1023,0.01250240057706833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,2047,0.016655999422073364
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,2047,0.0127920001745224
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,2047,0.014575999975204468
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,2047,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,2047,0.012904000282287598
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,1023,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,2047,0.015803200006484986
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,2047,0.01454399973154068
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,2047,0.014508800208568573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,2047,0.01276959925889969
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,2047,0.014561599493026734
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,2047,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,2047,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,4095,0.018585599958896637
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,4095,0.020751999318599702
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,4095,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,2047,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,4095,0.014510400593280792
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,4095,0.016524800658226015
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,4095,0.014569599926471711
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,4095,0.014478400349617004
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,4095,0.018644799292087556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,4095,0.014496000111103058
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,4095,0.014614400267601014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,4095,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,8191,0.024699200689792634
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,4095,0.01273919939994812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,4095,0.014580799639225006
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,8191,0.02627840042114258
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,4095,0.012547199428081513
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,8191,0.01660960018634796
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,8191,0.014796799421310425
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,8191,0.018676799535751343
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,8191,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,8191,0.01641920059919357
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,8191,0.018681600689888
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,8191,0.014804799854755402
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,8191,0.014504000544548035
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,8191,0.020747199654579163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,8191,0.014476799964904785
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,8191,0.016564799845218657
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,16383,0.027225598692893982
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,8191,0.014614400267601014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,16383,0.02651839852333069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,16383,0.020241600275039674
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,16383,0.018617600202560425
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,16383,0.020732800662517547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,16383,0.03929280042648316
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,16383,0.019644799828529357
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,16383,0.020633600652217865
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,16383,0.018667200207710268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,16383,0.02072640061378479
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,16383,0.0268528014421463
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,16383,0.017348800599575043
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,16383,0.01686079949140549
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,32767,0.03294079899787903
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,32767,0.04015839993953705
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,16383,0.018188799917697906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,32767,0.030985599756240843
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,32767,0.02781279981136322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,32767,0.027521601319313048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,32767,0.0270224004983902
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,32767,0.030713599920272828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,32767,0.02731359899044037
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,32767,0.022750400006771088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,32767,0.026924800872802735
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,32767,0.022457599639892578
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,32767,0.023721599578857423
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,32767,0.02272160053253174
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,32767,0.02268480062484741
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,65535,0.037462401390075686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,65535,0.03922399878501892
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,65535,0.043572801351547244
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,65535,0.03537119925022125
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,65535,0.03505280017852783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,65535,0.03623520135879517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,65535,0.02892799973487854
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,65535,0.030988800525665283
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,65535,0.03304960131645203
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,65535,0.03470079898834229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,65535,0.02683840095996857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,131071,0.05556319952011109
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,131071,0.06065599918365479
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,65535,0.026811200380325317
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,65535,0.026956799626350402
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,65535,0.027294400334358215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,131071,0.07089599967002869
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,131071,0.053518402576446536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,131071,0.05355839729309082
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,131071,0.054711997509002686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,131071,0.05547040104866028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,131071,0.041331198811531064
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,131071,0.03917120099067688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,131071,0.03299199938774109
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,131071,0.03298400044441223
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,131071,0.04143199920654297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,131071,0.032979199290275575
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,131071,0.032691198587417605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,1,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,1,0.0124208003282547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,1,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,1,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,1,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,1,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,1,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,1,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,1,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,1,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,1,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,1,0.009444800019264222
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,1,0.010411199927330018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,1,0.008451200276613235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,3,0.010659199953079224
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,3,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,3,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,3,0.011086399853229522
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,3,0.010369600355625152
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,3,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,3,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,3,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,3,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,3,0.010520000010728836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,3,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,3,0.009380800276994705
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,3,0.008486399799585343
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,3,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,7,0.012459199875593185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,7,0.010659199953079224
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,7,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,7,0.011816000193357467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,7,0.010339199751615524
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,7,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,7,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,7,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,7,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,7,0.008478400111198426
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,7,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,7,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,7,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,15,0.011003199964761734
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,7,0.010040000081062317
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,15,0.01053759977221489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,15,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,15,0.010609599947929382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,15,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,15,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,15,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,15,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,15,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,15,0.010391999781131745
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,15,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,31,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,31,0.010609599947929382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,15,0.010462400317192078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,15,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,15,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,31,0.010662399977445603
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,31,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,31,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,31,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,31,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,31,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,31,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,31,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,31,0.009408000111579894
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,31,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,31,0.00936639979481697
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,31,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,63,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,63,0.011852800101041793
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,63,0.010927999764680863
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,63,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,63,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,63,0.010367999970912933
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,63,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,63,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,63,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,63,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,63,0.010275200009346008
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,63,0.00888959988951683
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,63,0.00942559987306595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,127,0.012172800302505494
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,63,0.010356800258159637
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,127,0.011528000235557556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,127,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,127,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,127,0.010363200306892395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,127,0.010526400059461594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,127,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,127,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,127,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,127,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,127,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,127,0.008404800295829773
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,127,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,255,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,127,0.00865119993686676
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,255,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,255,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,255,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,255,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,255,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,255,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,255,0.010527999699115753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,255,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,255,0.010380800068378448
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,255,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,255,0.009444800019264222
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,511,0.012577599287033081
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,255,0.009945599734783173
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,255,0.00841279998421669
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,511,0.014112000167369843
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,511,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,511,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,511,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,511,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,511,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,511,0.012404800206422806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,511,0.012371200323104858
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,511,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,511,0.011430399864912033
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,511,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,511,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,511,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,1023,0.014519999921321868
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,1023,0.014534400403499603
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,1023,0.016422399878501893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,1023,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,1023,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,1023,0.012558400630950928
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,1023,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,1023,0.013977600634098053
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,1023,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,1023,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,1023,0.011508800089359283
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,1023,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,1023,0.011428800225257874
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,1023,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,2047,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,2047,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,2047,0.016648000478744505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,2047,0.01713919937610626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,2047,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,2047,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,2047,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,2047,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,2047,0.014484800398349762
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,2047,0.01255359947681427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,2047,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,2047,0.011124800145626067
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,4095,0.016569599509239197
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,2047,0.012443199753761292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,4095,0.01857440024614334
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,2047,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,4095,0.020716799795627593
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,4095,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,4095,0.01345279961824417
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,4095,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,4095,0.015121600031852723
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,4095,0.014582400023937226
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,4095,0.01656319946050644
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,4095,0.01470080018043518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,4095,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,4095,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,4095,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,8191,0.030772799253463747
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,8191,0.019012799859046935
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,4095,0.012414400279521943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,8191,0.018643200397491455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,8191,0.0208624005317688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,8191,0.01661919951438904
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,8191,0.01658399999141693
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,8191,0.01656640022993088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,8191,0.022668799757957457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,8191,0.01746560037136078
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,8191,0.01655679941177368
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,8191,0.015315200388431548
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,16383,0.02690880000591278
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,8191,0.016521599888801575
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,8191,0.014679999649524688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,16383,0.02277279943227768
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,16383,0.032625600695610046
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,8191,0.014601600170135499
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,16383,0.020446400344371795
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,16383,0.020686399936676026
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,16383,0.02110240012407303
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,16383,0.022694399952888487
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,16383,0.020636799931526183
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,16383,0.01889120042324066
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,16383,0.024827200174331664
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,16383,0.018699200451374055
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,16383,0.018603199720382692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,16383,0.01833280026912689
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,32767,0.033022400736808774
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,32767,0.037217599153518674
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,32767,0.02890239953994751
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,32767,0.03711679875850678
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,16383,0.017843200266361235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,32767,0.026761600375175477
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,32767,0.026759999990463256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,32767,0.03224799931049347
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,32767,0.026924800872802735
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,32767,0.026876801252365114
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,32767,0.022793599963188173
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,32767,0.022720000147819518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,32767,0.023097600042819976
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,32767,0.022868800163269042
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,32767,0.02886880040168762
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,65535,0.05070559978485108
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,65535,0.05745760202407837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,65535,0.05771039724349976
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,65535,0.051476800441741945
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,65535,0.05256159901618958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,65535,0.05193120241165161
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,65535,0.05493280291557312
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,65535,0.03511680066585541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,65535,0.03312000036239624
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,65535,0.02905920147895813
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,65535,0.028932800889015196
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,65535,0.030883198976516722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,65535,0.03639839887619019
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,65535,0.030990400910377504
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,131071,0.07745440006256103
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,131071,0.08570880293846131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,131071,0.09278720021247863
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,131071,0.07862079739570618
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,131071,0.07657600045204163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,131071,0.080731201171875
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,131071,0.052001601457595824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,131071,0.05210559964179993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,131071,0.07964640259742736
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,131071,0.053142398595809937
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,131071,0.04527679979801178
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,131071,0.04646719992160797
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,131071,0.04797439873218536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,131071,0.0469184011220932
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,1,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,1,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,1,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,1,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,1,0.010515200346708298
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,1,0.010515200346708298
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,1,0.010436800122261048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,1,0.011628799885511399
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,1,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,1,0.010521599650382995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,1,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,1,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,1,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,1,0.009391999989748
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,3,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,3,0.012217599898576736
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,3,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,3,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,3,0.012457600235939026
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,3,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,3,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,3,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,3,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,3,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,3,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,3,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,7,0.012439999729394913
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,3,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,7,0.012430399656295776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,3,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,7,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,7,0.010708799958229065
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,7,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,7,0.010515200346708298
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,7,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,7,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,7,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,7,0.0124208003282547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,7,0.010355199873447418
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,7,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,15,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,15,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,7,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,7,0.009473600238561631
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,15,0.010527999699115753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,15,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,15,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,15,0.010364799946546554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,15,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,15,0.010726399719715118
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,15,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,15,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,15,0.00942239984869957
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,15,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,15,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,15,0.010337600111961364
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,31,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,31,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,31,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,31,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,31,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,31,0.010651200264692306
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,31,0.011079999804496764
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,31,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,31,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,31,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,31,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,63,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,63,0.01263359934091568
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,31,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,31,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,31,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,63,0.0112015999853611
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,63,0.010523200035095215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,63,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,63,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,63,0.010345599800348281
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,63,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,63,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,63,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,63,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,63,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,127,0.012417600303888322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,63,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,63,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,127,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,127,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,127,0.010366400331258773
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,127,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,127,0.010531199723482132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,127,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,127,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,127,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,127,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,127,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,127,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,127,0.010372799634933472
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,127,0.00952799990773201
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,255,0.011196800321340562
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,255,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,255,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,255,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,255,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,255,0.012457600235939026
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,255,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,255,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,255,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,255,0.011190400272607804
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,255,0.010388799756765366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,255,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,511,0.013577599823474885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,511,0.013729600608348847
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,511,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,511,0.012945599853992462
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,255,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,255,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,511,0.012449599802494049
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,511,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,511,0.012433599680662155
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,511,0.01255040019750595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,511,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,511,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,511,0.01247360035777092
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,1023,0.016628800332546233
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,511,0.010558400303125381
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,511,0.010443200170993806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,1023,0.01639840006828308
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,1023,0.0144896000623703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,511,0.012433599680662155
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,1023,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,1023,0.012875199317932129
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,1023,0.012875199317932129
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,1023,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,1023,0.014497600495815277
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,1023,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,1023,0.012494400143623352
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,1023,0.01403840035200119
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,1023,0.012564800679683685
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,1023,0.01247360035777092
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,2047,0.020611199736595153
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,1023,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,2047,0.017671999335289002
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,2047,0.016651199758052827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,2047,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,2047,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,2047,0.016624000668525696
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,2047,0.014548799395561219
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,2047,0.014499199390411378
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,2047,0.012555199861526489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,2047,0.01459839940071106
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,2047,0.01255359947681427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,2047,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,2047,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,2047,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,4095,0.028327998518943787
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,4095,0.020689600706100465
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,4095,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,4095,0.016547200083732606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,4095,0.02064799964427948
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,4095,0.018636800348758698
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,4095,0.0186256006360054
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,4095,0.016652800142765045
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,4095,0.016612799465656282
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,4095,0.01539359986782074
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,4095,0.014497600495815277
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,4095,0.014483200013637542
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,4095,0.0157600000500679
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,8191,0.030972799658775328
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,8191,0.022681599855422972
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,4095,0.014500799775123595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,8191,0.02476000040769577
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,8191,0.01972319930791855
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,8191,0.01865279972553253
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,8191,0.01867839992046356
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,8191,0.017561599612236023
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,8191,0.022672000527381896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,8191,0.02149440050125122
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,8191,0.016681599617004394
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,8191,0.014563199877738953
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,8191,0.01791519969701767
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,8191,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,16383,0.0296640008687973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,8191,0.016651199758052827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,16383,0.0324288010597229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,16383,0.0350816011428833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,16383,0.02475520074367523
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,16383,0.022758400440216063
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,16383,0.02279199957847595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,16383,0.0262719988822937
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,16383,0.02276960015296936
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,16383,0.022763200104236603
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,16383,0.018729600310325622
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,16383,0.020715199410915375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,16383,0.0308351993560791
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,16383,0.018705600500106813
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,16383,0.019497600197792054
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,32767,0.04932000041007996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,32767,0.04896959960460663
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,32767,0.05142880082130432
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,32767,0.057359999418258666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,32767,0.04769119918346405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,32767,0.048451200127601624
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,32767,0.03303520083427429
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,32767,0.03235040009021759
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,32767,0.04938719868659973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,32767,0.026771199703216553
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,32767,0.03507040143013
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,32767,0.024830399453639983
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,32767,0.026310399174690247
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,65535,0.07642880082130432
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,32767,0.025115200877189638
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,65535,0.09660639762878417
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,65535,0.07365760207176208
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,65535,0.07832480072975159
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,65535,0.07375680208206177
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,65535,0.07273439764976501
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,65535,0.049928000569343566
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,65535,0.04835039973258972
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,65535,0.04215039908885956
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,65535,0.05248000025749207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,65535,0.07183200120925903
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,65535,0.04254559874534607
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,65535,0.04166719913482666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,131071,0.13569600582122804
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,131071,0.13424479961395264
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,65535,0.04121600091457367
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,131071,0.16487519741058348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,131071,0.12607840299606324
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,131071,0.12318719625473022
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,131071,0.12451519966125488
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,131071,0.08330879807472229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,131071,0.12451839447021484
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,131071,0.06386560201644897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,131071,0.07183520197868347
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,131071,0.0638863980770111
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,131071,0.08371040225028992
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,131071,0.06377440094947814
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,1,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,131071,0.06422240138053895
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,1,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,1,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,1,0.012561599910259246
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,1,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,1,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,1,0.01249760016798973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,1,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,1,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,1,0.012600000202655792
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,1,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,1,0.010366400331258773
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,1,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,1,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,3,0.01244800016283989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,3,0.010507199913263321
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,3,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,3,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,3,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,3,0.01247519999742508
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,3,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,3,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,3,0.010830400139093399
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,3,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,3,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,7,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,3,0.010446400195360184
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,3,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,7,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,3,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,7,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,7,0.011662399768829346
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,7,0.010441599786281586
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,7,0.012433599680662155
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,7,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,7,0.012740799784660339
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,7,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,7,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,7,0.010521599650382995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,7,0.01037440001964569
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,7,0.010377600044012069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,15,0.012479999661445617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,15,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,15,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,15,0.010375999659299851
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,7,0.010406400263309478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,15,0.010763200372457505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,15,0.010710400342941285
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,15,0.010478399693965912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,15,0.012425599992275238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,15,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,15,0.010440000146627427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,15,0.010412800312042236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,31,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,31,0.011427199840545655
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,31,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,15,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,31,0.01241919994354248
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,15,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,31,0.01061599999666214
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,15,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,31,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,31,0.010454399883747101
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,31,0.012409599870443344
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,31,0.011321599781513213
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,31,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,31,0.010476800054311753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,31,0.010396800190210342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,31,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,31,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,63,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,63,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,63,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,63,0.010795199871063232
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,63,0.010382399708032609
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,63,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,63,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,63,0.010558400303125381
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,63,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,63,0.010473600029945374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,63,0.010395199805498124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,63,0.010414399951696397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,63,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,63,0.01045600026845932
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,127,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,127,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,127,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,127,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,127,0.010387200117111205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,127,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,127,0.012435200065374375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,127,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,127,0.0104592002928257
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,127,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,127,0.010503999888896942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,127,0.010424000024795533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,127,0.010417599976062775
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,255,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,127,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,255,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,255,0.010550399869680404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,255,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,255,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,255,0.012417600303888322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,255,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,255,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,255,0.010486400127410889
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,255,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,255,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,255,0.010393600165843963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,255,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,255,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,511,0.014596800506114959
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,511,0.01552480012178421
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,511,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,511,0.012555199861526489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,511,0.012510399520397186
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,511,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,511,0.013886399567127228
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,511,0.012510399520397186
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,511,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,511,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,511,0.010590399801731109
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,511,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,511,0.010463999956846238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,1023,0.020611199736595153
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,511,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,1023,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,1023,0.01661120057106018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,1023,0.014553600549697876
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,1023,0.014497600495815277
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,1023,0.01409280002117157
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,1023,0.014571200311183929
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,1023,0.016590400040149687
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,1023,0.014535999298095703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,1023,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,1023,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,2047,0.0206496000289917
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,1023,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,2047,0.026881599426269533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,1023,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,1023,0.012457600235939026
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,2047,0.018614399433135986
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,2047,0.01658719927072525
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,2047,0.016630400717258454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,2047,0.01655520051717758
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,2047,0.016540800034999848
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,2047,0.020708799362182617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,2047,0.014523200690746307
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,2047,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,2047,0.01661919951438904
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,2047,0.012577599287033081
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,2047,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,4095,0.023576000332832338
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,2047,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,4095,0.020640000700950623
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,4095,0.016676799952983858
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,4095,0.018607999384403228
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,4095,0.01736319959163666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,4095,0.028932800889015196
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,4095,0.01871200054883957
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,4095,0.022158400714397432
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,4095,0.016638399660587312
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,4095,0.02065120041370392
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,4095,0.015641599893569946
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,4095,0.014579200744628906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,4095,0.014838400483131408
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,8191,0.028972798585891725
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,8191,0.030067199468612672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,8191,0.02104160040616989
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,4095,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,8191,0.03333280086517334
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,8191,0.02065120041370392
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,8191,0.02245599925518036
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,8191,0.022566400468349457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,8191,0.026393601298332216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,8191,0.022710399329662324
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,8191,0.024860799312591553
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,8191,0.01656640022993088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,8191,0.01664479970932007
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,8191,0.01669120043516159
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,8191,0.018595199286937713
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,16383,0.05739679932594299
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,16383,0.04739840030670166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,16383,0.04513440132141113
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,16383,0.050937598943710326
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,16383,0.045184001326560974
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,16383,0.04548799991607666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,16383,0.04601599872112274
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,16383,0.030969598889350893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,16383,0.03304960131645203
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,16383,0.03361920118331909
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,16383,0.022737599909305573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,16383,0.024222399294376373
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,32767,0.07975199818611145
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,16383,0.022750400006771088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,32767,0.07533599734306336
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,16383,0.02279680073261261
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,32767,0.0721231997013092
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,32767,0.07232480049133301
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,32767,0.09472320079803467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,32767,0.07203199863433837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,32767,0.0738319993019104
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,32767,0.04089120030403137
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,32767,0.046953600645065305
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,32767,0.05112000107765198
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,32767,0.049476799368858335
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,32767,0.03996799886226654
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,32767,0.03917120099067688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,32767,0.03955360054969788
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,65535,0.1362895965576172
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,65535,0.12964799404144287
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,65535,0.12663840055465697
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,65535,0.12893120050430298
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,65535,0.12241439819335938
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,65535,0.16438239812850952
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,65535,0.12615840435028075
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,65535,0.08277440071105957
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,65535,0.08341439962387084
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,65535,0.06340479850769043
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,65535,0.06297439932823182
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,65535,0.06269919872283936
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,65535,0.06979680061340332
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,65535,0.06175199747085571
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,131071,0.2372512102127075
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,131071,0.25298879146575926
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,131071,0.30569279193878174
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,131071,0.22369120121002198
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,131071,0.22680160999298096
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,131071,0.22166240215301514
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,131071,0.15363680124282836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,131071,0.22296640872955323
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,131071,0.12024159431457519
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,131071,0.106331205368042
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,131071,0.10578399896621704
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,131071,0.10590399503707885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,131071,0.14682719707489014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,1,0.014475199580192565
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,1,0.012432000041007996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,131071,0.10625599622726441
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,1,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,1,0.010846400260925293
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,1,0.01061440035700798
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,1,0.01051200032234192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,1,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,1,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,1,0.01255040019750595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,1,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,1,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,1,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,1,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,1,0.010384000092744827
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,3,0.011260800063610077
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,3,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,3,0.01249760016798973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,3,0.014478400349617004
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,3,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,3,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,3,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,3,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,3,0.013607999682426453
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,3,0.010470400005578995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,3,0.01048320010304451
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,7,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,3,0.010489600151777268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,3,0.010471999645233154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,3,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,7,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,7,0.014575999975204468
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,7,0.0124208003282547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,7,0.010523200035095215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,7,0.01053439974784851
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,7,0.012571200728416443
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,7,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,7,0.010438399761915207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,7,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,7,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,7,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,15,0.014766399562358857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,7,0.0124208003282547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,15,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,7,0.010523200035095215
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,15,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,15,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,15,0.012219200283288956
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,15,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,15,0.012449599802494049
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,15,0.010527999699115753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,15,0.01072160005569458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,15,0.01048479974269867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,15,0.010452800244092942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,15,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,15,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,15,0.01040479987859726
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,31,0.01451839953660965
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,31,0.010838399827480315
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,31,0.01162080019712448
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,31,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,31,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,31,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,31,0.010556799918413162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,31,0.011646399646997452
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,31,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,31,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,31,0.010505600273609162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,63,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,31,0.010494399815797806
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,63,0.014584000408649444
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,63,0.012585599720478059
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,31,0.010398399829864503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,31,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,63,0.012515200674533844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,63,0.010425599664449692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,63,0.01138240024447441
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,63,0.012505599856376648
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,63,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,63,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,63,0.010409600287675857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,63,0.01048159971833229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,63,0.011436799913644791
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,63,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,63,0.010428799688816071
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,127,0.012564800679683685
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,127,0.014606399834156037
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,127,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,127,0.011025600135326385
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,127,0.012492799758911132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,127,0.010497599840164185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,127,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,127,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,127,0.012513600289821625
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,127,0.011228799819946289
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,127,0.010407999902963639
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,127,0.0104032002389431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,127,0.010460799932479859
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,255,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,255,0.011980800330638886
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,127,0.010449600219726563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,255,0.011240000277757645
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,255,0.014484800398349762
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,255,0.011318399757146835
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,255,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,255,0.0106175996363163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,255,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,255,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,255,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,255,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,255,0.010580799728631973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,255,0.010420800000429154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,255,0.010385599732398988
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,511,0.018649600446224213
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,511,0.016542400419712066
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,511,0.014571200311183929
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,511,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,511,0.014100800454616546
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,511,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,511,0.012569600343704223
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,511,0.01250240057706833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,511,0.012556800246238708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,511,0.012520000338554382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,511,0.012435200065374375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,511,0.01241919994354248
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,511,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,511,0.01143840029835701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,1023,0.020665599405765532
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,1023,0.016676799952983858
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,1023,0.015000000596046448
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,1023,0.025492799282073975
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,1023,0.01658719927072525
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,1023,0.014713600277900696
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,1023,0.01570879966020584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,1023,0.01865600049495697
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,1023,0.016542400419712066
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,1023,0.014542399346828461
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,1023,0.01448799967765808
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,1023,0.012585599720478059
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,1023,0.01252799928188324
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,2047,0.022716799378395082
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,2047,0.027451199293136597
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,2047,0.020703999698162077
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,2047,0.01857600063085556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,1023,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,2047,0.01809120029211044
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,2047,0.016543999314308167
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,2047,0.018854400515556334
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,2047,0.020710399746894835
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,2047,0.01780479997396469
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,2047,0.017815999686717987
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,2047,0.014483200013637542
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,2047,0.014521600306034088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,2047,0.014545600116252898
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,2047,0.014519999921321868
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,4095,0.02884959876537323
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,4095,0.028934401273727418
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,4095,0.02067199945449829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,4095,0.03136799931526184
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,4095,0.02075359970331192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,4095,0.023123200237751006
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,4095,0.022767999768257143
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,4095,0.02073120027780533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,4095,0.02069920003414154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,4095,0.019871999323368073
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,4095,0.016654400527477263
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,4095,0.01505119949579239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,8191,0.04791040122509003
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,8191,0.05391680002212525
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,4095,0.016547200083732606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,8191,0.04968959987163544
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,4095,0.016684800386428833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,8191,0.04525760114192963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,8191,0.04527679979801178
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,8191,0.04408800005912781
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,8191,0.04324640035629272
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,8191,0.03303999900817871
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,8191,0.020732800662517547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,8191,0.020659199357032774
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,8191,0.02924000024795532
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,8191,0.02072480022907257
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,16383,0.0769536018371582
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,8191,0.02282879948616028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,16383,0.07579839825630189
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,8191,0.028942400217056276
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,16383,0.09386240243911743
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,16383,0.07327520251274108
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,16383,0.07060800194740295
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,16383,0.07228000164031982
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,16383,0.0705024003982544
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,16383,0.05102880001068115
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,16383,0.04538719952106476
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,16383,0.0391072005033493
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,16383,0.0392192006111145
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,16383,0.03903039991855621
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,16383,0.0383296012878418
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,16383,0.04970879852771759
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,32767,0.1708847999572754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,32767,0.13985439538955688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,32767,0.13037439584732055
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,32767,0.1243664026260376
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,32767,0.12560800313949586
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,32767,0.12588160037994384
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,32767,0.06844159960746765
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,32767,0.06087999939918518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,32767,0.05971519947052002
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,32767,0.05986559987068176
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,32767,0.08671519756317139
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,32767,0.06293280124664306
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,32767,0.12528480291366578
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,65535,0.2711008071899414
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,65535,0.24244000911712646
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,32767,0.0862559974193573
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,65535,0.3283776044845581
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,65535,0.22924959659576416
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,65535,0.1608399987220764
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,65535,0.2338576078414917
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,65535,0.11911360025405884
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,65535,0.22687039375305176
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,65535,0.10638079643249512
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,65535,0.22528479099273682
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,65535,0.10589599609375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,65535,0.10507680177688598
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,131071,0.5218512058258057
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,131071,0.6356751918792725
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,65535,0.15760480165481566
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,131071,0.4241504192352295
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,65535,0.10465919971466064
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,131071,0.4608320236206055
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,131071,0.4251999855041504
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,131071,0.4238719940185547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,131071,0.42420639991760256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,131071,0.2273952007293701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,131071,0.29930720329284666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,131071,0.18995840549468995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,131071,0.1887503981590271
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,1,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,131071,0.1908527970314026
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,131071,0.18780959844589235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,1,0.014480000734329224
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,131071,0.3133951902389526
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,1,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,1,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,1,0.01249919980764389
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,1,0.012439999729394913
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,1,0.01263359934091568
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,1,0.012587200105190276
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,1,0.01249919980764389
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,1,0.012432000041007996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,1,0.012428800016641617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,1,0.010492800176143647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,1,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,3,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,1,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,3,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,3,0.01252640038728714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,3,0.012417600303888322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,3,0.012439999729394913
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,3,0.012491200119256973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,3,0.01247519999742508
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,3,0.010401599854230881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,3,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,3,0.014529600739479065
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,3,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,3,0.010520000010728836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,3,0.012516799569129943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,3,0.011547199636697768
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,7,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,7,0.014591999351978302
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,7,0.012438400089740754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,7,0.012548799812793731
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,7,0.012559999525547028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,7,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,7,0.012433599680662155
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,7,0.010710400342941285
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,7,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,7,0.014560000598430633
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,7,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,7,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,7,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,15,0.01451359987258911
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,15,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,7,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,15,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,15,0.012508800625801087
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,15,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,15,0.01449279934167862
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,15,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,15,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,15,0.012459199875593185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,15,0.010585600137710571
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,15,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,15,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,15,0.010500799864530563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,15,0.012038400024175644
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,31,0.012484800070524216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,31,0.014556799829006196
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,31,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,31,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,31,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,31,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,31,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,31,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,31,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,31,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,31,0.010499200224876404
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,31,0.012652799487113953
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,31,0.010524799674749374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,63,0.014577600359916686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,63,0.014535999298095703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,63,0.012511999905109405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,63,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,31,0.01043199971318245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,63,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,63,0.012535999715328216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,63,0.014529600739479065
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,63,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,63,0.010516799986362457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,63,0.01247519999742508
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,63,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,63,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,63,0.010451199859380722
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,63,0.010390400141477584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,127,0.014528000354766845
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,127,0.012912000715732574
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,127,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,127,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,127,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,127,0.012563200294971466
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,127,0.012590399384498597
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,127,0.012649600207805634
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,127,0.012505599856376648
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,127,0.012503999471664428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,127,0.010502400249242783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,127,0.010480000078678131
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,127,0.010520000010728836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,255,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,255,0.014500799775123595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,255,0.012558400630950928
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,127,0.010491199791431427
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,255,0.014577600359916686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,255,0.01247519999742508
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,255,0.012415999919176102
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,255,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,255,0.011451199650764465
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,255,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,255,0.012507200241088867
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,255,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,255,0.01040000021457672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,511,0.02077919989824295
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,255,0.01082720011472702
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,255,0.010532800108194351
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,511,0.018585599958896637
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,511,0.01656640022993088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,511,0.014593599736690522
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,511,0.01451839953660965
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,511,0.01562879979610443
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,511,0.01462240070104599
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,511,0.01249760016798973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,511,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,511,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,511,0.012432000041007996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,511,0.014635199308395385
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,511,0.018691200017929076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,511,0.012531200051307678
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,1023,0.022840000689029694
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,1023,0.024855999648571013
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,1023,0.01889120042324066
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,1023,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,1023,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,1023,0.021316799521446227
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,1023,0.016641600430011748
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,1023,0.016548800468444824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,1023,0.016622400283813475
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,1023,0.013806399703025819
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,1023,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,1023,0.012639999389648438
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,1023,0.018771199882030486
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,2047,0.02765600085258484
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,2047,0.02028000056743622
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,2047,0.020735999941825865
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,2047,0.019196799397468566
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,2047,0.030985599756240843
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,2047,0.03087199926376343
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,2047,0.02476799935102463
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,1023,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,2047,0.02075359970331192
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,2047,0.016516800224781036
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,2047,0.020521600544452668
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,2047,0.020692799985408784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,2047,0.015196800231933594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,2047,0.014585599303245544
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,2047,0.014609600603580474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,4095,0.049236801266670224
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,4095,0.05502560138702393
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,4095,0.04127199947834015
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,4095,0.04973919987678528
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,4095,0.04239520132541656
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,4095,0.03505919873714447
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,4095,0.026897600293159483
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,4095,0.030563199520111085
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,4095,0.04110879898071289
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,4095,0.020660799741744996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,4095,0.018904000520706177
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,4095,0.041203200817108154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,8191,0.07892320156097413
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,4095,0.01956160068511963
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,4095,0.018751999735832213
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,8191,0.07610080242156983
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,8191,0.07009279727935791
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,8191,0.09505280256271362
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,8191,0.0699887990951538
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,8191,0.0686464011669159
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,8191,0.05114719867706299
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,8191,0.03732640147209167
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,8191,0.0520576000213623
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,8191,0.03700479865074158
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,8191,0.06872000098228455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,8191,0.03704800009727478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,8191,0.045291200280189514
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,8191,0.036820799112319946
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,16383,0.14141759872436524
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,16383,0.12984639406204224
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,16383,0.17293440103530883
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,16383,0.1229904055595398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,16383,0.12380160093307495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,16383,0.0869920015335083
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,16383,0.07009119987487793
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,16383,0.12134720087051391
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,16383,0.1214400053024292
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,16383,0.08594560027122497
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,16383,0.05968480110168457
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,16383,0.06058719754219055
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,16383,0.059540802240371705
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,16383,0.05902720093727112
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,32767,0.3293776035308838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,32767,0.2650576114654541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,32767,0.24142560958862305
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,32767,0.23023359775543212
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,32767,0.22867839336395263
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,32767,0.1629439949989319
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,32767,0.12257920503616333
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,32767,0.22861599922180176
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,32767,0.22801439762115477
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,32767,0.10509599447250366
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,32767,0.10497920513153076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,32767,0.15595680475234985
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,32767,0.1061743974685669
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,32767,0.10464160442352295
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,65535,0.5254479885101319
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,65535,0.46949119567871095
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,65535,0.6386415958404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,65535,0.4446000099182129
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,65535,0.4404655933380127
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,65535,0.3147504091262817
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,65535,0.4378367900848389
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,65535,0.2318592071533203
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,65535,0.43968639373779295
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,65535,0.195905601978302
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,65535,0.2993599891662598
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,65535,0.1949295997619629
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,65535,0.19525599479675293
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,65535,0.19459359645843505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,1,0.014060799777507783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,1,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,1,0.015859200060367583
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,1,0.014524799585342408
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,1,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,1,0.01626719981431961
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,1,0.012435200065374375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,1,0.01255040019750595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,1,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,1,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,1,0.012484800070524216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,1,0.012622399628162384
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,1,0.010435199737548828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,1,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,3,0.016502399742603303
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,3,0.014619199931621552
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,3,0.012465599924325943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,3,0.01284160017967224
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,3,0.01241919994354248
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,3,0.012478400021791458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,3,0.012467200309038163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,3,0.012579199671745301
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,3,0.012534399330615998
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,3,0.016420799493789672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,3,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,3,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,3,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,3,0.010524799674749374
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,7,0.01652960032224655
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,7,0.014472000300884247
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,7,0.012484800070524216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,7,0.014577600359916686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,7,0.012547199428081513
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,7,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,7,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,7,0.016459199786186218
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,7,0.012484800070524216
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,7,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,15,0.016550399363040924
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,15,0.014499199390411378
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,7,0.010467199981212616
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,7,0.012462399899959564
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,15,0.01449279934167862
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,7,0.01080159991979599
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,7,0.010468800365924836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,15,0.01297599971294403
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,15,0.012537600100040435
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,15,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,15,0.01255200058221817
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,15,0.010496000200510025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,15,0.016571199893951415
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,15,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,15,0.012593600153923034
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,15,0.012495999783277511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,15,0.010419200360774993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,15,0.010416000336408614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,31,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,31,0.01592320054769516
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,31,0.012464000284671784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,31,0.014572800695896148
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,31,0.013686400651931763
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,31,0.012540799379348756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,31,0.014593599736690522
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,31,0.012566399574279786
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,31,0.012558400630950928
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,31,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,31,0.012435200065374375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,31,0.01043040007352829
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,31,0.01043360009789467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,31,0.010862399637699128
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,63,0.014500799775123595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,63,0.014552000164985656
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,63,0.012544000148773193
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,63,0.016607999801635742
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,63,0.012532800436019897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,63,0.012454400211572647
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,63,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,63,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,63,0.012539200484752655
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,63,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,63,0.010475199669599533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,63,0.01655679941177368
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,63,0.010518400371074677
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,63,0.01072160005569458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,127,0.016451199352741242
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,127,0.014476799964904785
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,127,0.012694400548934937
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,127,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,127,0.012572799623012543
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,127,0.014575999975204468
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,127,0.0125231996178627
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,127,0.012459199875593185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,127,0.01250240057706833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,127,0.012470400333404541
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,127,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,127,0.010427200049161912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,255,0.014608000218868256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,127,0.010422399640083313
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,127,0.01050880029797554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,255,0.013545599579811097
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,255,0.016649599373340606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,255,0.014524799585342408
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,255,0.01252480000257492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,255,0.012563200294971466
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,255,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,255,0.016531200706958772
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,255,0.012468799948692322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,255,0.010487999767065048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,255,0.0105103999376297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,255,0.014059199392795563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,511,0.020601600408554077
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,255,0.012452799826860428
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,255,0.01045759990811348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,511,0.022678400576114654
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,511,0.01860000044107437
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,511,0.01587360054254532
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,511,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,511,0.014886400103569031
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,511,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,511,0.01650400012731552
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,511,0.020721599459648132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,511,0.012721599638462066
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,511,0.018638400733470915
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,511,0.012481600046157837
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,511,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,511,0.01250080019235611
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,1023,0.02893120050430298
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,1023,0.020641599595546723
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,1023,0.018624000251293182
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,1023,0.022694399952888487
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,1023,0.01871040016412735
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,1023,0.02892639935016632
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,1023,0.031134399771690368
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,1023,0.018699200451374055
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,1023,0.018680000305175783
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,1023,0.0227743998169899
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,1023,0.016547200083732606
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,1023,0.01459839940071106
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,1023,0.014595200121402741
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,1023,0.014564800262451171
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,2047,0.04742879867553711
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,2047,0.051076799631118774
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,2047,0.05361440181732178
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,2047,0.04510720074176788
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,2047,0.04311839938163757
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,2047,0.043064001202583316
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,2047,0.0419295996427536
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,2047,0.020652799308300017
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,2047,0.01860959976911545
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,2047,0.01857919991016388
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,2047,0.018617600202560425
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,2047,0.03155840039253235
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,2047,0.0289792001247406
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,2047,0.028832000494003297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,4095,0.07671999931335449
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,4095,0.0779807984828949
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,4095,0.09200639724731445
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,4095,0.06928960084915162
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,4095,0.0706991970539093
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,4095,0.04735200107097626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,4095,0.06943359971046448
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,4095,0.04814560115337372
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,4095,0.06854400038719177
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,4095,0.03898400068283081
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,4095,0.04949440062046051
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,4095,0.03711679875850678
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,4095,0.03705599904060364
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,4095,0.03710240125656128
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,8191,0.16878559589385986
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,8191,0.12534400224685668
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,8191,0.12359999418258667
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,8191,0.13946880102157594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,8191,0.12301119565963745
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,8191,0.13256479501724244
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,8191,0.12262239456176757
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,8191,0.08279520273208618
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,8191,0.07246080040931702
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,8191,0.08483039736747741
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,8191,0.05962399840354919
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,16383,0.27193920612335204
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,8191,0.0627776026725769
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,8191,0.05965120196342468
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,8191,0.059539198875427246
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,16383,0.2434864044189453
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,16383,0.32306880950927735
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,16383,0.2313839912414551
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,16383,0.23054399490356445
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,16383,0.23174240589141845
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,16383,0.1556048035621643
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,16383,0.10666079521179199
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,16383,0.10504000186920166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,16383,0.22675199508666993
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,16383,0.12439520359039306
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,16383,0.10483520030975342
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,16383,0.10455039739608765
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,16383,0.15740959644317626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,32767,0.5215184211730957
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,32767,0.4680528163909912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,32767,0.6300015926361084
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,32767,0.43751039505004885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,32767,0.4382160186767578
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,32767,0.4387472152709961
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,32767,0.4365407943725586
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,32767,0.23664479255676268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,32767,0.2953504085540771
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,32767,0.1965183973312378
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,32767,0.19561439752578735
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,32767,0.305404806137085
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,32767,0.19438400268554687
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,1,0.016519999504089354
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,32767,0.19548799991607665
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,1,0.02279680073261261
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,1,0.020641599595546723
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,1,0.01451680064201355
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,1,0.013499200344085693
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,1,0.013734400272369385
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,1,0.022724799811840057
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,1,0.01273919939994812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,1,0.012428800016641617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,1,0.014444799721240997
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,1,0.017950400710105896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,1,0.016548800468444824
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,1,0.011553599685430526
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,3,0.022707200050354003
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,1,0.012427199631929398
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,3,0.016625599563121797
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,3,0.020732800662517547
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,3,0.012486399710178375
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,3,0.01356479972600937
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,3,0.014507199823856353
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,3,0.022711999714374542
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,3,0.01356000006198883
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,3,0.015086400508880615
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,3,0.01855839937925339
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,7,0.022756800055503845
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,3,0.012479999661445617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,3,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,7,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,7,0.02069759964942932
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,3,0.012488000094890594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,3,0.012451200187206269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,7,0.016519999504089354
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,7,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,7,0.022686399519443512
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,7,0.014526399970054626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,7,0.015311999619007111
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,7,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,7,0.012439999729394913
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,7,0.018611200153827667
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,7,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,7,0.012518399953842163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,7,0.012406399846076966
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,15,0.022705599665641785
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,15,0.016633599996566772
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,15,0.01451520025730133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,15,0.020657600462436677
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,15,0.022771200537681578
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,15,0.014587199687957764
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,15,0.016527999937534333
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,15,0.014505599439144135
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,15,0.017393599450588226
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,15,0.01449279934167862
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,15,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,15,0.012444800138473511
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,15,0.012433599680662155
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,31,0.02272319942712784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,31,0.02080959975719452
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,31,0.014472000300884247
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,15,0.012556800246238708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,31,0.014526399970054626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,31,0.016648000478744505
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,31,0.014499199390411378
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,31,0.01634240001440048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,31,0.022785599529743194
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,31,0.01449120044708252
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,31,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,31,0.0124719999730587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,31,0.012439999729394913
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,31,0.012489599734544754
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,63,0.022710399329662324
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,63,0.016601599752902985
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,63,0.02065120041370392
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,63,0.014510400593280792
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,63,0.014510400593280792
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,31,0.017950400710105896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,63,0.022703999280929567
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,63,0.016571199893951415
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,63,0.01674720048904419
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,63,0.014574399590492249
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,63,0.014494399726390838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,63,0.012460800260305405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,63,0.012441600114107132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,63,0.012483199685811996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,127,0.022777600586414336
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,127,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,127,0.014484800398349762
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,63,0.012455999851226807
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,127,0.01451359987258911
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,127,0.02067520022392273
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,127,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,127,0.014521600306034088
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,127,0.02277279943227768
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,127,0.015939199924468996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,127,0.012479999661445617
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,127,0.01242239996790886
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,127,0.012439999729394913
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,255,0.026895999908447266
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,127,0.01815039962530136
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,127,0.012436799705028534
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,255,0.01448799967765808
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,255,0.018646399676799773
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,255,0.014524799585342408
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,255,0.014511999487876893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,255,0.020641599595546723
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,255,0.01448799967765808
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,255,0.017190399765968322
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,255,0.012545600533485413
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,255,0.016646400094032288
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,255,0.012430399656295776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,255,0.024854399263858795
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,255,0.012459199875593185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,511,0.02675839960575104
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,511,0.018588800728321076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,511,0.03728480041027069
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,511,0.018643200397491455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,255,0.012476799637079239
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,511,0.018719999492168425
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,511,0.016755199432373045
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,511,0.03331199884414673
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,511,0.02895359992980957
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,511,0.020707200467586517
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,511,0.021547199785709382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,511,0.014561599493026734
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,511,0.013358399271965027
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,511,0.014593599736690522
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,511,0.014502400159835815
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,1023,0.049132800102233885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,1023,0.05557760000228882
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,1023,0.04304639995098114
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,1023,0.05300319790840149
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,1023,0.041591998934745786
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,1023,0.04124000072479248
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,1023,0.04121119976043701
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,1023,0.02680639922618866
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,1023,0.039831998944282535
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,1023,0.018080000579357148
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,1023,0.03205600082874298
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,1023,0.018588800728321076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,1023,0.018564799427986146
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,2047,0.08333600163459778
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,2047,0.07575839757919312
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,1023,0.018592000007629395
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,2047,0.09631360173225403
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,2047,0.06926079988479614
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,2047,0.04545280039310455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,2047,0.0559984028339386
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,2047,0.06791039705276489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,2047,0.06627839803695679
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,2047,0.06815840005874634
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,2047,0.036375999450683594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,2047,0.035257598757743834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,2047,0.037859201431274414
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,2047,0.05209439992904663
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,2047,0.035155200958251955
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,4095,0.14736000299453736
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,4095,0.12873599529266358
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,4095,0.12127200365066529
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,4095,0.11953760385513305
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,4095,0.17383999824523927
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,4095,0.12007679939270019
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,4095,0.08773919939994812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,4095,0.07119680047035218
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,4095,0.0902239978313446
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,4095,0.06037600040435791
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,4095,0.12111040353775024
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,4095,0.05915200114250183
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,8191,0.27440481185913085
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,4095,0.05870400071144104
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,8191,0.24041121006011962
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,8191,0.32736160755157473
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,4095,0.059431999921798706
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,8191,0.22819039821624756
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,8191,0.22794721126556397
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,8191,0.22533280849456788
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,8191,0.2272047996520996
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,8191,0.1049232006072998
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,8191,0.12425600290298462
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,8191,0.1605855941772461
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,8191,0.10589920282363892
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,8191,0.10345439910888672
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,8191,0.10399199724197387
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,8191,0.15920959711074828
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,16383,0.4671520233154297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,16383,0.6344624042510987
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,16383,0.4334847927093506
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,16383,0.4353616237640381
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,16383,0.5439248085021973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,16383,0.43392481803894045
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,16383,0.43421759605407717
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,16383,0.23489758968353272
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,16383,0.301528000831604
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,16383,0.1941375970840454
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,16383,0.3136336088180542
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,16383,0.1961184024810791
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,16383,0.19449599981307983
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,1,0.03996320068836212
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,16383,0.19332000017166137
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,1,0.02484479993581772
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,1,0.020659199357032774
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,1,0.03292160034179688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,1,0.018697600066661834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,1,0.0369951993227005
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,1,0.019524799287319185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,1,0.026815998554229736
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,1,0.01899999976158142
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,1,0.022838400304317476
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,1,0.01571200042963028
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,1,0.016433599591255187
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,1,0.01655679941177368
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,3,0.03917120099067688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,3,0.024750399589538574
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,1,0.01796640008687973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,3,0.018697600066661834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,3,0.018755200505256652
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,3,0.02078240066766739
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,3,0.019467200338840484
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,3,0.03261440098285675
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,3,0.037062400579452516
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,3,0.022777600586414336
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,3,0.018108800053596497
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,3,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,3,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,3,0.026283198595047
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,7,0.031092798709869383
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,3,0.01653600037097931
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,7,0.02483839988708496
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,7,0.020656000077724456
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,7,0.039996799826622007
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,7,0.020470400154590607
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,7,0.019731199741363524
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,7,0.037041598558425905
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,7,0.02688960134983063
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,7,0.02316640019416809
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,7,0.018299199640750885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,7,0.0186271995306015
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,7,0.0165120005607605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,7,0.016551999747753142
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,15,0.04115839898586273
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,7,0.01642719954252243
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,15,0.020670400559902193
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,15,0.024851199984550477
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,15,0.03303520083427429
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,15,0.020729599893093108
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,15,0.0187376007437706
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,15,0.020689600706100465
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,15,0.026844799518585205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,15,0.018729600310325622
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,15,0.016492800414562227
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,15,0.022804799675941467
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,15,0.01656000018119812
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,15,0.03708640038967133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,31,0.02475679963827133
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,15,0.01459999978542328
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,31,0.041089600324630736
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,31,0.02080159932374954
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,31,0.03301439881324768
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,31,0.02073120027780533
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,31,0.01918399930000305
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,31,0.026840001344680786
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,31,0.019838400185108185
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,31,0.03700799942016601
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,31,0.01871200054883957
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,31,0.023497599363327026
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,31,0.01587360054254532
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,31,0.01664000004529953
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,63,0.0411871999502182
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,31,0.014920000731945039
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,63,0.018911999464035035
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,63,0.02478879988193512
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,63,0.021400000154972076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,63,0.03291679918766022
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,63,0.02016959935426712
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,63,0.02072640061378479
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,63,0.036976000666618346
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,63,0.026782399415969847
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,63,0.018236799538135527
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,63,0.02380799949169159
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,63,0.015166400372982025
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,63,0.016631999611854555
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,127,0.026793599128723145
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,63,0.016145600378513335
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,127,0.032411199808120725
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,127,0.041177600622177124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,127,0.020824000239372253
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,127,0.020729599893093108
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,127,0.03799999952316284
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,127,0.02056639939546585
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,127,0.020628799498081208
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,127,0.02674719989299774
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,127,0.023217600584030152
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,127,0.016684800386428833
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,127,0.016574400663375854
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,127,0.018588800728321076
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,127,0.016575999557971954
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,255,0.03705599904060364
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,255,0.04802559912204742
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,255,0.0370959997177124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,255,0.02265920042991638
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,255,0.020718400180339814
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,255,0.02072640061378479
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,255,0.042840000987052915
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,255,0.026907199621200563
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,255,0.020638400316238405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,255,0.02690559923648834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,255,0.016497600078582763
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,255,0.01650400012731552
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,255,0.01857759952545166
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,255,0.01650719940662384
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,511,0.06373440027236939
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,511,0.053681600093841556
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,511,0.042396798729896545
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,511,0.04129759967327118
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,511,0.03705120086669922
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,511,0.044284799695014955
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,511,0.03903999924659729
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,511,0.051571202278137204
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,511,0.04116959869861603
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,511,0.060356801748275755
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,511,0.02268960028886795
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,511,0.020750400424003602
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,511,0.020156799256801604
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,1023,0.09402400255203247
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,1023,0.08388800024986268
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,511,0.020761600136756896
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,1023,0.09790239930152893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,1023,0.06879199743270874
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,1023,0.06894239783287048
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,1023,0.06731839776039124
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,1023,0.06723039746284484
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,1023,0.052172797918319705
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,1023,0.07111679911613464
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,1023,0.05667359828948974
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,1023,0.03723999857902527
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,1023,0.03716320097446442
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,1023,0.039164799451828006
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,1023,0.037206399440765384
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,2047,0.15853279829025269
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,2047,0.13810559511184692
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,2047,0.1214176058769226
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,2047,0.12010879516601562
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,2047,0.09999520182609559
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,2047,0.08015199899673461
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,2047,0.12250560522079468
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,2047,0.12090879678726196
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,2047,0.17647839784622193
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,2047,0.09219520092010498
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,2047,0.0644591987133026
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,2047,0.06168320178985596
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,2047,0.061179202795028684
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,2047,0.06035360097885132
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,4095,0.2497472047805786
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,4095,0.2840800046920776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,4095,0.2235663890838623
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,4095,0.2285072088241577
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,4095,0.22319839000701905
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,4095,0.22236320972442628
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,4095,0.17501599788665773
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,4095,0.330076789855957
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,4095,0.16399040222167968
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,4095,0.10751039981842041
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,4095,0.11009279489517212
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,4095,0.10738240480422974
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,4095,0.10641119480133057
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,4095,0.13456640243530274
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,8191,0.4757199764251709
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,8191,0.43358559608459474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,8191,0.43579678535461425
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,8191,0.6300352096557618
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,8191,0.5469295978546143
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,8191,0.4310624122619629
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,8191,0.4294095993041992
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,8191,0.24459519386291503
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,8191,0.2013456106185913
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,8191,0.19845759868621826
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,8191,0.19599519968032836
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,8191,0.3250799894332886
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,1,0.07050880193710327
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,8191,0.19693759679794312
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,8191,0.30511519908905027
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,1,0.044649600982666016
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,1,0.05312640070915222
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,1,0.03295199871063233
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,1,0.02903519868850708
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,1,0.030025601387023926
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,1,0.044889599084854126
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,1,0.026815998554229736
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,1,0.029180800914764403
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,1,0.024769599735736846
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,1,0.022759999334812164
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,1,0.06731520295143127
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,1,0.02300799936056137
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,1,0.03918560147285462
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,3,0.06917759776115417
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,3,0.04530879855155945
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,3,0.033032000064849854
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,3,0.029147198796272276
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,3,0.030900800228118898
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,3,0.053523200750350955
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,3,0.03908959925174713
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,3,0.043751999735832214
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,3,0.030083200335502623
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,3,0.026966398954391478
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,3,0.02287199944257736
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,3,0.06620320081710815
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,3,0.02327519953250885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,7,0.06980320215225219
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,7,0.045393601059913635
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,7,0.02956799864768982
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,3,0.022707200050354003
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,7,0.053825598955154416
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,7,0.06765440106391907
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,7,0.030956798791885377
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,7,0.03914560079574585
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,7,0.030888000130653383
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,7,0.03296639919281006
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,7,0.044495999813079834
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,7,0.022742399573326112
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,15,0.06957119703292847
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,7,0.026902401447296144
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,15,0.0329263985157013
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,15,0.04548960030078888
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,7,0.02479359954595566
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,15,0.029313600063323973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,15,0.053830397129058835
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,7,0.022908799350261688
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,15,0.02946079969406128
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,15,0.06775839924812317
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,15,0.0391072005033493
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,15,0.030939200520515443
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,15,0.026872000098228453
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,15,0.022720000147819518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,31,0.06894559860229492
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,15,0.045203199982643126
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,15,0.02465600073337555
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,15,0.022767999768257143
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,31,0.03300639986991882
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,31,0.047142401337623596
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,31,0.02913280129432678
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,31,0.05475199818611145
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,31,0.029455998539924623
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,31,0.06711199879646301
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,31,0.04470880031585693
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,31,0.030964800715446474
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,31,0.02696000039577484
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,31,0.023291200399398804
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,31,0.022720000147819518
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,31,0.03984639942646027
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,31,0.023127999901771546
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,63,0.04695200026035309
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,63,0.06991519927978515
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,63,0.02935360074043274
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,63,0.03296799957752228
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,63,0.029481598734855653
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,63,0.04112319946289063
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,63,0.05648959875106811
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,63,0.030988800525665283
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,63,0.044414401054382324
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,63,0.06785600185394287
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,63,0.026790401339530943
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,63,0.022870400547981264
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,63,0.02280000001192093
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,127,0.036692801117897036
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,127,0.0494271993637085
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,127,0.031071999669075014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,127,0.030969598889350893
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,63,0.023500800132751465
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,127,0.05789600014686584
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,127,0.02948960065841675
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,127,0.07039039731025695
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,127,0.04552479982376099
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,127,0.06784960031509399
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,127,0.02686559855937958
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,127,0.04325119853019714
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,127,0.022782400250434875
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,127,0.02274879962205887
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,127,0.02452320009469986
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,255,0.08419839739799499
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,255,0.06585599780082703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,255,0.06219519972801209
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,255,0.04645920097827912
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,255,0.04311679899692535
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,255,0.043268799781799316
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,255,0.04778240025043488
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,255,0.04140959978103638
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,255,0.022912000119686127
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,255,0.02351039946079254
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,255,0.028944000601768494
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,255,0.022777600586414336
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,255,0.05146719813346863
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,255,0.07342399954795838
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,511,0.1114351987838745
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,511,0.09869279861450195
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,511,0.07365919947624207
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,511,0.07046239972114562
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,511,0.0957647979259491
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,511,0.07028639912605286
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,511,0.07490079998970031
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,511,0.0625216007232666
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,511,0.04377439916133881
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,511,0.08804320096969605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,511,0.04019359946250915
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,511,0.06574879884719849
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,511,0.03907999992370605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,1023,0.14957120418548583
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,1023,0.17042399644851686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,511,0.03928000032901764
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,1023,0.12309759855270386
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,1023,0.1736624002456665
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,1023,0.11643199920654297
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,1023,0.12830560207366942
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,1023,0.09112640023231507
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,1023,0.12167680263519287
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,1023,0.12121599912643433
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,1023,0.09569439888000489
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,1023,0.06810879707336426
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,1023,0.06264960169792175
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,2047,0.29695680141448977
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,1023,0.06373760104179382
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,2047,0.2644416093826294
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,1023,0.06218079924583435
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,2047,0.22761919498443603
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,2047,0.31228959560394287
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,2047,0.22477920055389405
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,2047,0.23098719120025635
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,2047,0.22377440929412842
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,2047,0.11585279703140258
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,2047,0.1882159948348999
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,2047,0.14739680290222168
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,2047,0.11108319759368897
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,2047,0.10939359664916992
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,2047,0.16193920373916626
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,2047,0.10872479677200317
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,4095,0.5398704051971436
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,4095,0.4913440227508545
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,4095,0.4328479766845703
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,4095,0.4323440074920654
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,4095,0.4401408195495605
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,4095,0.5851344108581543
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,4095,0.3267519950866699
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,4095,0.25803680419921876
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,4095,0.43188161849975587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,4095,0.2937391996383667
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,4095,0.20872321128845214
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,4095,0.20309441089630126
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,4095,0.2010335922241211
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,1,0.07835360169410706
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,1,0.12767360210418702
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,1,0.0548911988735199
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,4095,0.19959360361099243
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,1,0.10228960514068604
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,1,0.04955680072307587
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,1,0.049409601092338565
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,1,0.1230623960494995
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,1,0.08029119968414307
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,1,0.07219200134277344
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,1,0.04859519898891449
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,1,0.03714239895343781
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,1,0.03908640146255493
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,1,0.0453247994184494
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,1,0.03704639971256256
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,3,0.12725919485092163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,3,0.07931839823722839
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,3,0.10035359859466553
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,3,0.05472639799118042
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,3,0.049353599548339844
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,3,0.12329280376434326
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,3,0.07244319915771484
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,3,0.04940640032291412
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,3,0.04927360117435455
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,3,0.08043839931488037
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,3,0.039099198579788205
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,3,0.03710559904575348
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,7,0.1273344039916992
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,3,0.04541279971599579
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,3,0.03703519999980927
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,7,0.07839199900627136
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,7,0.1006767988204956
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,7,0.04952000081539154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,7,0.049246400594711304
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,7,0.049604800343513486
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,7,0.05556480288505554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,7,0.07357599735260009
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,7,0.1230847954750061
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,7,0.08043360114097595
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,7,0.03710080087184906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,7,0.039062398672103885
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,7,0.03733600080013275
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,7,0.045756798982620236
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,15,0.07901279926300049
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,15,0.12887680530548096
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,15,0.1016111969947815
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,15,0.05142719745635986
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,15,0.04938879907131195
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,15,0.049748799204826354
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,15,0.0739296019077301
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,15,0.05756319761276245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,15,0.1230847954750061
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,15,0.08148000240325928
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,15,0.039175999164581296
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,15,0.03712959885597229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,15,0.037062400579452516
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,15,0.0455808013677597
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,31,0.12904959917068481
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,31,0.07862079739570618
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,31,0.05140159726142883
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,31,0.05707839727401733
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,31,0.10097600221633911
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,31,0.04954879879951477
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,31,0.049326398968696596
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,31,0.07396479845046997
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,31,0.12319999933242798
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,31,0.046132799983024594
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,31,0.03916319906711578
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,31,0.08262240290641784
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,63,0.07971519827842713
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,31,0.03712959885597229
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,63,0.12926559448242186
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,63,0.0595088005065918
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,63,0.10109920501708984
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,31,0.03710080087184906
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,63,0.04939360022544861
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,63,0.12311999797821045
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,63,0.07390879988670349
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,63,0.053388798236846925
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,63,0.050913602113723755
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,63,0.08336960077285767
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,63,0.04657120108604431
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,63,0.03713279962539673
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,63,0.03705280125141144
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,127,0.12862080335617065
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,127,0.06528159976005554
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,127,0.08870559930801392
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,63,0.03912799954414368
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,127,0.10513119697570801
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,127,0.055955201387405396
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,127,0.057571202516555786
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,127,0.05517920255661011
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,127,0.12331039905548095
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,127,0.07416319847106934
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,127,0.0828432023525238
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,127,0.03803519904613495
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,127,0.037136000394821164
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,127,0.04110080003738403
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,127,0.04940800070762634
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,255,0.11050879955291748
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,255,0.1525424003601074
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,255,0.11365599632263183
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,255,0.07299519777297973
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,255,0.07801920175552368
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,255,0.06948480010032654
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,255,0.0870688021183014
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,255,0.08103839755058288
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,255,0.05129759907722473
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,255,0.06863359808921814
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,255,0.04469920098781586
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,255,0.13013919591903686
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,511,0.2117919921875
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,255,0.041203200817108154
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,511,0.16666239500045776
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,255,0.04210880100727081
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,511,0.1301408052444458
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,511,0.18558239936828613
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,511,0.12053120136260986
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,511,0.16038880348205567
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,511,0.12200000286102294
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,511,0.10917439460754394
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,511,0.07232159972190857
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,511,0.11210720539093018
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,511,0.06626880168914795
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,511,0.06372159719467163
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,511,0.12091519832611083
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,511,0.0628271996974945
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,1023,0.27328479290008545
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,1023,0.3281872034072876
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,1023,0.3423295974731445
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,1023,0.21576480865478515
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,1023,0.22663679122924804
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,1023,0.21770238876342773
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,1023,0.22977280616760254
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,1023,0.1625375986099243
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,1023,0.17581119537353515
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,1023,0.21470561027526855
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,1023,0.11079360246658325
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,1023,0.11782560348510743
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,1023,0.10597599744796753
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,2047,0.5937920093536377
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,2047,0.6084080219268799
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,2047,0.49152960777282717
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,2047,0.41449599266052245
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,1023,0.10730400085449218
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,2047,0.40958080291748045
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,2047,0.4224463939666748
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,2047,0.4106592178344727
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,2047,0.27211999893188477
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,2047,0.37563040256500246
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,2047,0.30832159519195557
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,2047,0.20734241008758544
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,2047,0.19517760276794432
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,2047,0.1991104006767273
SGLang,0.5.10,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,2047,0.19305919408798217
