framework,version,device,op_name,kernel_source,bmm_dtype,num_tokens,num_heads,latency
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1,128,0.006723199784755707
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1,128,0.0088128000497818
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1,64,0.006668800115585327
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1,32,0.004502400010824204
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1,32,0.006748799979686737
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1,16,0.004399999976158142
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1,64,0.004412800073623657
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1,16,0.0067552000284194945
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1,8,0.004233599826693535
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1,8,0.006742399930953979
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1,4,0.004131200164556504
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1,4,0.006415999680757523
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1,2,0.003824000060558319
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1,2,0.006752000004053116
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1,1,0.004303999990224838
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1,1,0.006617599725723266
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2,128,0.007356800138950348
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2,64,0.004579199850559235
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2,32,0.004105599969625473
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2,128,0.009507200121879578
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2,16,0.004416000097990036
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2,16,0.006489600241184235
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2,8,0.004416000097990036
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2,8,0.006585600227117539
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2,4,0.004425600171089172
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2,4,0.0064640000462532045
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2,2,0.004441599920392036
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2,2,0.006623999774456024
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4,64,0.004604800045490265
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4,64,0.0075392000377178196
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4,32,0.004361600056290627
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4,32,0.0065760001540184024
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4,16,0.004204799979925155
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4,16,0.006620799750089645
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4,8,0.004383999854326248
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4,8,0.006684800237417221
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4,4,0.006672000139951706
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4,2,0.0044064000248909
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4,2,0.006595200300216675
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4,1,0.0043935999274253845
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4,1,0.006492800265550614
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8,128,0.0066143997013568875
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8,128,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8,64,0.004371200129389763
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8,64,0.008499199897050858
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8,32,0.004416000097990036
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8,32,0.006623999774456024
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8,16,0.0044511999934911724
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8,16,0.006662400066852569
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8,8,0.0044319998472929
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8,8,0.0070431999862194065
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8,4,0.004287999868392944
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8,4,0.006579200178384781
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8,2,0.0044096000492572784
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8,2,0.006646399945020675
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8,1,0.004284799844026565
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8,1,0.006627199798822403
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,16,128,0.006598400324583054
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,16,128,0.012585599720478059
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,16,64,0.004470400139689445
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,16,64,0.00867839977145195
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,16,32,0.005590400099754334
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,16,32,0.008473599702119828
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2,1,0.0040640000253915785
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,16,16,0.004287999868392944
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4,128,0.006543999910354615
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,16,8,0.004467200115323066
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,16,4,0.004470400139689445
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,16,4,0.006646399945020675
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,16,2,0.004371200129389763
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,16,2,0.006563200056552887
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,16,1,0.004188799858093261
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,16,1,0.006470400094985962
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,32,128,0.006540799885988236
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4,4,0.004518400132656098
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,32,128,0.014796799421310425
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,32,64,0.004665600135922432
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,32,64,0.01050880029797554
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,32,32,0.004422400146722794
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,32,16,0.0042559999972581865
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,32,32,0.008495999872684479
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,32,8,0.004441599920392036
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,32,8,0.008054400235414505
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,32,4,0.004495999962091446
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,32,4,0.007097599655389785
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,32,2,0.004399999976158142
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2,64,0.006758400052785873
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,32,2,0.006639999896287918
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,32,1,0.006665600091218948
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,48,128,0.008406399935483932
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,48,128,0.018838399648666383
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,48,64,0.004633599892258644
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,48,64,0.012591999769210816
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,48,32,0.0044511999934911724
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,48,32,0.008687999844551087
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2,32,0.0066592000424861904
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,48,16,0.00445760004222393
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,48,8,0.004281599819660187
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,48,16,0.008534400165081025
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,48,8,0.007462400197982788
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,48,4,0.004377600178122521
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,48,4,0.0067103996872901915
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,48,2,0.004169600084424019
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,48,2,0.007212799787521362
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,48,1,0.004403200000524521
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,48,1,0.006620799750089645
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,64,128,0.006447999924421311
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,64,128,0.01892800033092499
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,64,64,0.0046016000211238865
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,64,64,0.012838399410247803
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,64,32,0.004473600164055824
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,64,32,0.01051200032234192
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,64,16,0.004508800059556961
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,64,16,0.008460800349712371
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,64,8,0.004460800066590309
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,64,8,0.007273600250482559
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,64,4,0.004320000112056732
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,64,4,0.006723199784755707
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,64,2,0.004300799965858459
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,64,2,0.006672000139951706
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,64,1,0.004112000018358231
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,64,1,0.0066431999206542965
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,32,1,0.004422400146722794
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,80,128,0.006675200164318084
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,80,64,0.006451199948787689
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,80,128,0.022844800353050233
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,80,64,0.014608000218868256
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,80,32,0.004441599920392036
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,80,32,0.010716799646615982
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,80,16,0.004374400153756142
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,80,16,0.008806400001049042
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,80,8,0.00856959968805313
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,80,4,0.004416000097990036
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,80,4,0.006764800101518631
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,80,2,0.0042015999555587765
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,80,2,0.006537599861621857
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,80,1,0.004380799829959869
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,80,1,0.00663359984755516
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2,1,0.006761600077152252
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,96,128,0.006684800237417221
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,96,64,0.00650240033864975
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,96,128,0.02500480115413666
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,96,64,0.016732800006866454
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,96,32,0.01096000000834465
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,96,16,0.004524800181388855
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,96,16,0.008953599631786347
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,96,8,0.0044319998472929
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,96,8,0.008537600189447403
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,96,4,0.004502400010824204
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,96,4,0.006588800251483918
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,96,2,0.004390399903059006
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,96,2,0.008470399677753449
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,96,1,0.004467200115323066
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,96,1,0.007481600344181061
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,128,128,0.008515200018882752
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,128,128,0.031071999669075014
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,128,64,0.006611199676990509
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,128,64,0.018745599687099455
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,128,32,0.004521600157022476
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,128,32,0.01279360055923462
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,128,16,0.010540799796581268
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,128,8,0.004364800080657006
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,128,8,0.008502399921417237
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,128,4,0.004524800181388855
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,128,4,0.007715199887752533
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,128,2,0.0037728000432252886
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,128,2,0.007212799787521362
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,128,1,0.0042559999972581865
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,128,1,0.0065151996910572055
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,80,8,0.004361600056290627
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,160,128,0.00854720026254654
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,160,64,0.006656000018119812
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,160,64,0.02104640007019043
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,160,32,0.006483200192451477
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,160,32,0.01462399959564209
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,160,16,0.004428799822926521
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,160,16,0.01055999994277954
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,160,8,0.004278400167822838
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,96,32,0.00451200008392334
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4,128,0.010553599894046783
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,160,8,0.00852160006761551
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,160,4,0.004441599920392036
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,160,2,0.004464000090956688
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,160,1,0.004358400031924248
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,160,4,0.008499199897050858
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,160,2,0.009244800359010697
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,16,16,0.006854400038719177
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,160,1,0.007228799909353256
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,192,128,0.04132480025291443
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,192,64,0.02500480115413666
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,192,32,0.006319999694824219
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,192,32,0.016697600483894348
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,192,16,0.00451200008392334
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,192,16,0.010713600367307664
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,192,8,0.008739200234413148
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,192,4,0.004323200136423111
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,192,4,0.008511999994516373
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,192,2,0.004470400139689445
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,192,2,0.006656000018119812
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,192,1,0.004281599819660187
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,128,16,0.004374400153756142
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,192,1,0.006780800223350525
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,256,128,0.01072319969534874
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,256,128,0.051692801713943484
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,256,64,0.008463999629020691
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,256,64,0.02922239899635315
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,256,32,0.006550399959087372
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,256,32,0.01889919936656952
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,160,128,0.03711360096931458
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,256,16,0.00459199994802475
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,256,8,0.0045056000351905824
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,256,8,0.010579200088977813
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,256,4,0.004374400153756142
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,256,4,0.0084927998483181
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,256,2,0.0042304001748561856
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,256,2,0.008444800227880477
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,256,1,0.004204799979925155
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,256,1,0.006617599725723266
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,320,128,0.012639999389648438
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,320,128,0.06371840238571166
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,16,8,0.0066143997013568875
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,320,64,0.008665599673986436
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,320,32,0.006595200300216675
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,192,64,0.006617599725723266
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,192,128,0.009875199943780898
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,320,64,0.03664959967136383
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,320,16,0.005603199824690819
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,320,32,0.02096319943666458
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,320,16,0.014812800288200378
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,320,4,0.004502400010824204
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,320,8,0.01064639985561371
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,320,4,0.00865280032157898
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,320,2,0.0044511999934911724
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,320,2,0.008668799698352814
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,320,1,0.008486399799585343
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,384,128,0.014742399752140044
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,384,64,0.010684800148010255
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,384,64,0.04140479862689972
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,384,32,0.006627199798822403
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,384,32,0.024911999702453613
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,384,16,0.006307200342416763
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,384,16,0.016755199432373045
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,384,8,0.00488319993019104
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,384,8,0.01194240003824234
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,384,4,0.004518400132656098
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,384,4,0.00910080000758171
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,384,2,0.004527999833226204
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,384,2,0.008614400029182434
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,384,1,0.004396799951791763
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,256,16,0.012783999741077422
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,384,1,0.00852160006761551
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,512,128,0.01876160055398941
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,512,64,0.01263359934091568
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,512,128,0.09753919839859009
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,512,32,0.008460800349712371
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,512,64,0.051798397302627565
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,512,16,0.006454399973154068
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,512,32,0.029414400458335876
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,512,16,0.018886399269104005
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,512,8,0.00461760014295578
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,512,4,0.004447999969124794
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,512,4,0.010531199723482132
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,512,2,0.004377600178122521
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,512,2,0.008703999966382981
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,512,1,0.005811199918389321
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,768,128,0.027209600806236266
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,32,16,0.008601599931716919
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,768,128,0.14198399782180787
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,768,64,0.014838400483131408
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,320,8,0.0043680001050233844
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,768,32,0.010678400099277497
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,768,16,0.0067552000284194945
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,192,8,0.004550400003790855
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,768,32,0.042710399627685545
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,320,1,0.004067200049757957
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,768,8,0.0065311998128890995
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,768,16,0.025110399723052977
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,768,4,0.004761600121855736
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,768,2,0.004355200007557869
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,768,4,0.012585599720478059
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,768,2,0.01064639985561371
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,384,128,0.073990398645401
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,768,1,0.008524800091981888
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1024,128,0.03521920144557953
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1024,64,0.018783999979496
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1024,128,0.18906879425048828
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1024,32,0.01053439974784851
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1024,64,0.09688000082969665
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1024,32,0.051577597856521606
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1024,16,0.008463999629020691
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1024,8,0.006454399973154068
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1024,8,0.018748800456523895
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1024,4,0.004691199958324432
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1024,2,0.004473600164055824
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1024,4,0.01286720037460327
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1024,1,0.004387199878692627
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1024,2,0.010694400221109391
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1024,1,0.00854720026254654
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1536,128,0.04752320051193237
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1536,64,0.027075201272964478
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1536,128,0.2790816068649292
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1536,64,0.14185919761657714
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1536,32,0.014703999459743499
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1536,32,0.07407680153846741
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1536,16,0.01061440035700798
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,512,8,0.012751999497413635
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1536,16,0.041340801119804385
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1536,8,0.006729599833488464
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1536,4,0.006496000289916992
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,512,1,0.008585599809885025
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1536,4,0.016784000396728515
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1536,2,0.012489599734544754
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1536,1,0.004383999854326248
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1536,1,0.009062399715185165
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,768,64,0.07420799732208253
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2048,128,0.061900800466537474
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2048,64,0.03399359881877899
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,768,8,0.016659200191497803
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2048,128,0.3694623947143555
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2048,64,0.18887679576873778
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,768,1,0.004345599934458733
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2048,32,0.0972927987575531
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2048,16,0.010764800012111664
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2048,8,0.008723200112581254
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2048,16,0.05167999863624573
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2048,8,0.030608001351356506
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2048,4,0.006294400244951248
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2048,2,0.0044319998472929
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2048,4,0.01884479969739914
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2048,1,0.004454400017857551
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2048,2,0.01268800050020218
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,2048,1,0.01066880002617836
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1024,16,0.03099200129508972
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,3072,64,0.04650560021400452
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,3072,128,0.5507232189178467
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,3072,32,0.02620159983634949
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,3072,64,0.27971200942993163
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,3072,32,0.14177279472351073
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,3072,16,0.07400000095367432
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,3072,8,0.010655999928712846
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,3072,4,0.006550399959087372
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,3072,8,0.041484799981117246
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,3072,4,0.025094398856163026
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,3072,2,0.006425599753856659
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,3072,2,0.016771200299263
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,3072,1,0.004566400125622749
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4096,128,0.11353919506072999
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,1536,8,0.024963200092315674
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,1536,2,0.004649600014090538
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4096,64,0.06115840077400207
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4096,32,0.03341760039329529
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4096,64,0.36964800357818606
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4096,32,0.1886847972869873
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4096,16,0.018320000171661376
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4096,16,0.09650560021400452
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4096,8,0.051737600564956666
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4096,4,0.008534400165081025
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,2048,32,0.018723200261592864
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4096,4,0.030985599756240843
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4096,2,0.006444799900054932
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4096,2,0.018828800320625304
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4096,1,0.004636799916625023
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,6144,128,0.16500480175018312
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,3072,128,0.08771520256996154
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,6144,64,0.08713600039482117
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,6144,32,0.04655359983444214
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,6144,64,0.550387191772461
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,6144,32,0.2769023895263672
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,6144,16,0.025084799528121947
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,6144,128,1.1149984359741212
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,6144,8,0.014575999975204468
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,3072,1,0.011049599945545196
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,6144,4,0.010540799796581268
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,6144,2,0.006592000275850296
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,6144,4,0.04142400026321411
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,6144,2,0.02507199943065643
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,6144,1,0.006521599739789963
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,6144,1,0.016681599617004394
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8192,128,0.21526720523834228
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8192,64,0.1137503981590271
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4096,128,0.7335999965667724
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8192,32,0.061343997716903687
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8192,128,1.485212802886963
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8192,64,0.7337664127349853
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8192,16,0.03402239978313446
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8192,16,0.18886719942092894
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8192,8,0.018668800592422485
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8192,4,0.01098880022764206
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,4096,8,0.0106175996363163
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8192,2,0.00865280032157898
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8192,2,0.03102400004863739
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,8192,1,0.006428799778223038
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8192,1,0.018745599687099455
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,4096,1,0.012777599692344665
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,float16,3072,16,0.01483519971370697
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,6144,16,0.1398303985595703
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,6144,8,0.07401599884033203
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8192,32,0.3702944040298462
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8192,8,0.09662079811096191
SGLang,0.5.9,NVIDIA B200,mla_gen_pre,default,fp8,8192,4,0.0516543984413147
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1,128,0.00461760014295578
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1,128,0.007145600020885467
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1,64,0.006694400310516357
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1,32,0.004694399982690811
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1,32,0.007500799745321274
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1,16,0.004636799916625023
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1,16,0.008550400286912918
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1,8,0.004633599892258644
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1,8,0.008684799820184708
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1,4,0.004915200173854828
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1,4,0.006649599969387054
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1,2,0.004556800052523613
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1,2,0.006678400188684463
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1,1,0.00432640016078949
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1,1,0.006569600105285645
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2,128,0.004579199850559235
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2,128,0.008604799956083297
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1,64,0.00448639988899231
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2,64,0.0045056000351905824
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2,32,0.004579199850559235
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2,32,0.008563199639320373
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2,16,0.008528000116348267
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2,8,0.004579199850559235
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2,8,0.007241600006818771
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2,4,0.004476799815893173
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2,4,0.006678400188684463
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2,2,0.004262400045990944
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2,2,0.006534399837255478
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2,1,0.004473600164055824
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2,1,0.006543999910354615
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4,128,0.004595199972391129
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4,128,0.008502399921417237
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4,64,0.008303999900817871
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4,32,0.0046431999653577805
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4,32,0.007635200023651123
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4,16,0.004527999833226204
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2,64,0.006700800359249115
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4,8,0.004579199850559235
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4,16,0.008531200140714646
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4,4,0.004499199986457825
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4,8,0.006623999774456024
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4,4,0.006694400310516357
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4,2,0.00445760004222393
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4,2,0.006486400216817856
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4,1,0.0066143997013568875
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8,128,0.004508800059556961
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8,128,0.00870719999074936
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8,64,0.004387199878692627
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8,64,0.008457600325345992
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8,32,0.004444799944758415
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8,16,0.004636799916625023
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8,8,0.004467200115323066
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8,4,0.007497599720954895
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8,2,0.0043519999831914905
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8,2,0.006569600105285645
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8,1,0.004374400153756142
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8,1,0.007075200229883194
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16,128,0.006294400244951248
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16,128,0.010531199723482132
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16,64,0.004566400125622749
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16,64,0.008524800091981888
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16,32,0.004534399881958961
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16,32,0.008553600311279297
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16,16,0.0044319998472929
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4,64,0.004633599892258644
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16,16,0.008537600189447403
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16,8,0.004473600164055824
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4,1,0.004447999969124794
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16,4,0.004476799815893173
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16,2,0.004236799851059914
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16,4,0.008419200032949447
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16,2,0.006681600213050842
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2,16,0.004540799930691719
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16,1,0.006668800115585327
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,32,128,0.006451199948787689
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,32,128,0.012831999361515046
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,32,64,0.004377600178122521
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,32,64,0.010576000064611435
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,32,32,0.004460800066590309
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,32,32,0.008675199747085572
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,32,16,0.004454400017857551
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,32,16,0.00851840004324913
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,32,8,0.004460800066590309
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,32,8,0.008614400029182434
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,32,4,0.004425600171089172
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,32,2,0.004416000097990036
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8,4,0.004419200122356415
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,32,1,0.004447999969124794
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16,1,0.004387199878692627
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,48,128,0.006400000303983688
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,48,128,0.016646400094032288
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,48,64,0.004614400118589402
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,48,64,0.012534399330615998
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,48,32,0.0044511999934911724
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,32,4,0.008166400343179702
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,48,32,0.009167999774217606
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,48,16,0.008607999980449676
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,48,8,0.004476799815893173
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,48,8,0.008473599702119828
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,48,4,0.004454400017857551
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,48,16,0.004569600149989128
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,48,4,0.007446400076150894
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,48,2,0.004598399996757508
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,48,2,0.0066143997013568875
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,48,1,0.004499199986457825
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,48,1,0.006579200178384781
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,64,128,0.006460800021886826
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,64,128,0.018729600310325622
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,64,64,0.004473600164055824
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,64,64,0.012771199643611907
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,64,32,0.004383999854326248
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8,16,0.008531200140714646
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8,32,0.0066880002617835995
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,64,16,0.008691199868917466
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16,8,0.006752000004053116
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,32,2,0.008499199897050858
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,64,4,0.004460800066590309
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,64,4,0.00854720026254654
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,64,2,0.00498879998922348
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,64,2,0.006646399945020675
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,64,1,0.0042975999414920805
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,64,1,0.008563199639320373
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,80,128,0.006694400310516357
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,80,128,0.02088959962129593
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,80,64,0.004553600028157234
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,80,64,0.014595200121402741
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,80,32,0.004566400125622749
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,80,32,0.010684800148010255
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,80,16,0.004416000097990036
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,80,16,0.008713600039482117
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,80,8,0.00493439994752407
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,80,8,0.00865280032157898
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,80,4,0.004428799822926521
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,80,4,0.00827839970588684
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,80,2,0.004464000090956688
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,80,2,0.007011199742555619
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,80,1,0.004464000090956688
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,80,1,0.006623999774456024
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,96,128,0.006521599739789963
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,32,1,0.006492800265550614
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,96,64,0.004575999826192856
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,96,32,0.0044319998472929
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,96,64,0.01485760062932968
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,96,32,0.011139199882745743
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,96,16,0.004518400132656098
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,96,16,0.008720000088214875
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,96,8,0.004508800059556961
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,96,4,0.004310400038957596
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,96,8,0.008675199747085572
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,96,4,0.008505599945783615
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,96,2,0.004374400153756142
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,96,1,0.0051807999610900875
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,96,2,0.008479999750852585
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,96,1,0.007103999704122543
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,128,128,0.006585600227117539
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,128,64,0.006473600119352341
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,128,128,0.0289247989654541
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,128,64,0.018719999492168425
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,64,32,0.010569600015878677
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,128,32,0.004467200115323066
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,128,16,0.004447999969124794
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,128,32,0.01271360069513321
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,64,16,0.004364800080657006
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,128,16,0.010518400371074677
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,128,8,0.004447999969124794
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,128,4,0.004416000097990036
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,128,4,0.008528000116348267
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,64,8,0.004454400017857551
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,128,2,0.0044351998716592785
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,128,1,0.0044895999133586885
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,128,2,0.007417599856853485
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,128,1,0.008591999858617782
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,160,128,0.03307200074195862
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,160,64,0.006355199962854385
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,160,64,0.020975999534130096
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,160,32,0.004662400111556053
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,160,32,0.014748799800872802
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,160,16,0.00456320010125637
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,160,16,0.01061440035700798
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,160,8,0.004399999976158142
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,160,8,0.008630400151014328
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,160,4,0.004425600171089172
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,64,8,0.008566399663686752
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,160,2,0.0047391999512910845
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,160,2,0.008537600189447403
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,160,1,0.004470400139689445
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,160,1,0.007513599842786789
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,192,128,0.008643200248479843
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,192,64,0.006665600091218948
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,192,128,0.03925119936466217
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,192,64,0.023056000471115112
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,192,32,0.0046431999653577805
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,192,16,0.004636799916625023
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,192,32,0.016704000532627106
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,192,8,0.004527999833226204
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,192,16,0.012352000176906585
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,192,4,0.004422400146722794
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,192,8,0.009091199934482574
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,192,2,0.004508800059556961
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,192,4,0.008588799834251403
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,192,1,0.004380799829959869
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,192,2,0.008476799726486206
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,192,1,0.0066431999206542965
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,256,128,0.008515200018882752
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,256,64,0.006483200192451477
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,256,128,0.05021759867668152
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,256,32,0.004614400118589402
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,256,64,0.02898240089416504
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,256,32,0.018838399648666383
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,256,16,0.004595199972391129
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,256,8,0.004521600157022476
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,256,8,0.010636799782514573
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,256,4,0.0045855998992919925
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,256,4,0.00856959968805313
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,256,2,0.004579199850559235
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,256,2,0.008489599823951722
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,256,1,0.004470400139689445
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,256,1,0.008537600189447403
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,320,128,0.008739200234413148
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,320,128,0.060070401430130003
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,320,64,0.006672000139951706
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,320,64,0.03309440016746521
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8,8,0.008591999858617782
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,320,32,0.020883199572563172
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,128,8,0.008627200126647949
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,320,16,0.01462399959564209
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,160,128,0.006675200164318084
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,320,8,0.004464000090956688
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,320,4,0.00445760004222393
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,320,8,0.010566399991512298
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,320,4,0.008697599917650223
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,320,2,0.004569600149989128
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,320,2,0.008508799970149994
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,320,1,0.008473599702119828
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,384,128,0.010700800269842149
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,384,128,0.07400640249252319
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,384,64,0.006707199662923813
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,384,64,0.03916800022125244
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,384,32,0.0064640000462532045
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,384,32,0.023014399409294128
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,384,16,0.004527999833226204
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,384,16,0.014787200093269347
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,384,8,0.004447999969124794
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,384,8,0.010777600109577179
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,384,4,0.004371200129389763
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,384,4,0.010540799796581268
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,384,2,0.004595199972391129
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,256,16,0.012809599936008453
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,384,2,0.008656000345945358
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,384,1,0.004553600028157234
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,384,1,0.008508799970149994
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,96,128,0.022966399788856506
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,512,128,0.09673920273780823
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,512,64,0.008479999750852585
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,512,64,0.04951359927654266
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,512,32,0.006620799750089645
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,512,32,0.029078400135040282
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,512,16,0.004569600149989128
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,512,16,0.017632000148296356
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,512,8,0.004403200000524521
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,512,8,0.01263359934091568
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,512,4,0.004403200000524521
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,512,4,0.010704000294208527
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,512,2,0.0044511999934911724
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,512,2,0.0086496002972126
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,512,1,0.008755200356245042
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,768,128,0.027263998985290527
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,320,32,0.006534399837255478
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,768,128,0.14344320297241211
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,768,64,0.010598400235176086
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,768,32,0.006668800115585327
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,768,64,0.07196800112724304
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,768,32,0.03934400081634522
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,768,16,0.006428799778223038
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,768,8,0.004608000069856644
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,768,16,0.022867199778556824
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,768,8,0.015449599921703338
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,768,4,0.004518400132656098
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,768,2,0.004380799829959869
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,320,1,0.004220800101757049
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,768,2,0.00867839977145195
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,768,1,0.004444799944758415
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,160,4,0.008476799726486206
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1024,128,0.03708159923553467
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1024,64,0.012668800354003907
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1024,128,0.18865920305252076
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1024,64,0.09697920083999634
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1024,32,0.008499199897050858
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1024,32,0.04962559938430786
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1024,16,0.006543999910354615
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1024,16,0.0290367990732193
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1024,8,0.01870719939470291
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1024,4,0.004492799937725067
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1024,2,0.0044351998716592785
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1024,2,0.010636799782514573
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1024,1,0.004553600028157234
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1024,1,0.008697599917650223
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1536,128,0.051158398389816284
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1536,128,0.2797312021255493
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1536,64,0.02805120050907135
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,512,1,0.004460800066590309
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1536,64,0.1435968041419983
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1536,32,0.010547199845314026
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1536,32,0.0723360002040863
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1536,16,0.008393599838018417
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,320,16,0.004646399989724159
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1536,8,0.006428799778223038
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,768,4,0.011744000017642975
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1536,8,0.023014399409294128
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1536,4,0.004623999819159508
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1536,2,0.004550400003790855
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1536,4,0.01629440039396286
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1536,2,0.011088000237941742
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,768,1,0.00854720026254654
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1536,1,0.00868159979581833
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1536,1,0.004598399996757508
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2048,128,0.0680351972579956
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2048,64,0.037241598963737486
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2048,32,0.012518399953842163
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2048,128,0.36954240798950194
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2048,64,0.18856639862060548
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2048,16,0.008668799698352814
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2048,32,0.0967296004295349
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2048,16,0.04947839975357056
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,1024,8,0.004540799930691719
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2048,8,0.0064800001680850984
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2048,8,0.02898240089416504
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2048,4,0.004524800181388855
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2048,4,0.018764799833297728
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2048,2,0.004729599878191948
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,2048,1,0.004502400010824204
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2048,2,0.012726399302482604
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,2048,1,0.010476800054311753
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1024,4,0.012716799974441528
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,3072,128,0.08939520120620728
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,3072,64,0.049568000435829165
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,3072,16,0.010627199709415436
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,3072,32,0.1419935941696167
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,3072,128,0.5641695976257324
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,3072,8,0.03919999897480011
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,3072,8,0.008537600189447403
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,3072,16,0.07219840288162231
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,3072,4,0.006444799900054932
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,512,128,0.015177600085735321
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,3072,2,0.004579199850559235
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,3072,2,0.01491519957780838
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,3072,1,0.004540799930691719
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,3072,1,0.012643200159072877
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4096,128,0.11872960329055786
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4096,64,0.0676479995250702
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4096,32,0.031001600623130798
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4096,16,0.012627199292182922
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4096,64,0.3723936080932617
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4096,32,0.1877792000770569
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4096,8,0.00870719999074936
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4096,4,0.006646399945020675
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4096,8,0.04947839975357056
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4096,4,0.029017600417137145
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4096,2,0.018806399405002595
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4096,1,0.0045855998992919925
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4096,1,0.012627199292182922
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,1536,16,0.039401599764823915
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,6144,128,0.18101119995117188
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,6144,64,0.08672959804534912
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,6144,64,0.5678815841674805
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,6144,32,0.045535999536514285
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,6144,128,1.247424030303955
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,6144,16,0.024959999322891235
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,6144,32,0.2789983987808228
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,6144,8,0.01064319983124733
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,6144,16,0.14174720048904418
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,6144,8,0.07218559980392455
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,6144,4,0.03923200070858002
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,6144,2,0.0065151996910572055
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,6144,2,0.02293439954519272
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,6144,1,0.0046016000211238865
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,3072,32,0.02388480007648468
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,6144,1,0.01488959938287735
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,3072,64,0.27891199588775634
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8192,128,0.23517119884490967
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8192,64,0.11399999856948853
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,3072,4,0.02282239943742752
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8192,32,0.059977602958679196
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8192,16,0.03118399977684021
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8192,32,0.3703552007675171
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8192,8,0.012608000636100769
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8192,128,1.6877887725830079
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8192,8,0.09654719829559326
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4096,128,0.7776639938354493
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,4096,16,0.0977183997631073
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8192,2,0.00650240033864975
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8192,2,0.028988799452781676
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8192,1,0.00533440001308918
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8192,1,0.018691200017929076
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,12288,128,0.3438463926315308
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,12288,64,0.17063039541244507
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,12288,64,1.228867244720459
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,12288,32,0.08216639757156372
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,12288,128,2.6349536895751955
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,12288,16,0.0456959992647171
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,6144,4,0.0066592000424861904
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,12288,32,0.5627711772918701
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,12288,16,0.2781440019607544
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,12288,8,0.02486719936132431
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,12288,8,0.14239039421081542
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,12288,4,0.07208960056304932
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,12288,2,0.006630399823188781
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,12288,2,0.03913280069828033
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,12288,1,0.006585600227117539
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,12288,1,0.02300799936056137
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8192,16,0.18787839412689208
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16384,128,0.45373120307922366
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,8192,4,0.008687999844551087
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8192,4,0.04948799908161163
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16384,64,0.22115840911865234
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,4096,2,0.004598399996757508
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16384,32,0.11459840536117553
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16384,16,0.06196799874305725
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16384,16,0.36928958892822267
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16384,8,0.03108159899711609
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16384,32,0.7840127944946289
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16384,4,0.01255040019750595
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16384,8,0.18853119611740113
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16384,2,0.008636800199747085
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16384,4,0.09662079811096191
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16384,64,1.7050752639770508
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16384,2,0.049472001194953916
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,16384,1,0.006563200056552887
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16384,1,0.02895680069923401
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,20480,64,0.27860798835754397
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,20480,32,0.14056960344314576
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,20480,128,0.5663392066955566
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,16384,128,3.5030849456787108
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,20480,16,0.07399359941482545
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,20480,8,0.03779839873313904
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,20480,16,0.46626877784729004
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,20480,8,0.23330559730529785
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,20480,4,0.018748800456523895
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,20480,32,0.9770048141479493
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,20480,2,0.00852160006761551
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,20480,4,0.11910719871520996
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,20480,64,2.1487071990966795
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,12288,4,0.008787199854850769
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,20480,2,0.05976319909095764
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,20480,1,0.03327359855175018
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,8192,64,0.77707839012146
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,fp8,20480,128,4.467475128173828
SGLang,0.5.9,NVIDIA B200,mla_gen_post,default,float16,20480,1,0.0064800001680850984
