framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,1,0.01129066695769628
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,1,0.011050666371981302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,1,0.011034666250149408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,1,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,1,0.010922666639089584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,1,0.011183999478816986
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,1,0.011567999919255575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,1,0.011941333611806234
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,1,0.01210133358836174
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,1,0.01156266654531161
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,1,0.010954666882753372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,1,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,1,0.01091733326514562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,1,0.010911999891201654
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,1,0.011141333729028702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,1,0.011071999867757162
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,3,0.013157332936922709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,3,0.012005332857370377
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,3,0.013007999708255133
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,3,0.011850666254758835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,3,0.012293333808581034
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,3,0.011434666812419891
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,3,0.01190399999419848
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,3,0.011541333049535751
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,3,0.01240533341964086
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,3,0.011173332730929056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,3,0.011109333485364914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,3,0.010874666273593903
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,3,0.011157333850860596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,3,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,3,0.011301333705584208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,3,0.010832000523805618
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,7,0.013173333058754602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,7,0.012154666086037954
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,7,0.013072000195582708
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,7,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,7,0.012047999848922094
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,7,0.011264000087976456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,7,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,7,0.01109333336353302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,7,0.011045332998037338
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,7,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,7,0.010821333775917688
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,7,0.010847999403874079
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,7,0.010901333143313726
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,7,0.01098666712641716
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,7,0.010938666760921478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,7,0.011077333241701126
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,15,0.010928000013033548
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,15,0.01173866664369901
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,15,0.01126933346192042
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,15,0.01128000020980835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,15,0.01098666712641716
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,15,0.01128000020980835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,15,0.011418666690587997
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,15,0.011407999942700068
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,15,0.01259200026591619
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,15,0.01128000020980835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,15,0.011621333658695221
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,15,0.011589333415031433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,15,0.011066666493813196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,15,0.011018666128317514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,15,0.011999999483426413
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,31,0.013408000270525614
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,31,0.013061333447694778
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,15,0.011594666788975397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,31,0.01227733368674914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,31,0.012666666259368261
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,31,0.011274666835864386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,31,0.011498666057984034
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,31,0.011589333415031433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,31,0.01138666644692421
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,31,0.011695999652147293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,31,0.011626667032639185
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,31,0.011877333124478659
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,31,0.011231999844312668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,31,0.011589333415031433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,31,0.0116799995303154
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,31,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,63,0.011813333878914515
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,63,0.0129120002190272
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,63,0.011349332829316458
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,63,0.011136000355084738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,31,0.011253333340088526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,63,0.011370666325092316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,63,0.011247999966144562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,63,0.010933333386977514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,63,0.011584000041087469
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,63,0.011061333119869232
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,63,0.011359999577204386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,63,0.011690666278203329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,63,0.01180800050497055
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,63,0.010981333752473196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,63,0.011514666179815928
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,63,0.011381333072980246
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,63,0.011829332758982977
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,127,0.011285333583752314
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,127,0.010933333386977514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,127,0.013104000439246496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,127,0.010805333654085795
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,127,0.012367999802033106
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,127,0.011034666250149408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,127,0.011711999773979187
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,127,0.013013333082199097
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,127,0.01179733375708262
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,127,0.011861333002646765
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,127,0.012863999853531519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,127,0.012410666793584824
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,127,0.01322666679819425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,127,0.011968000481526056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,127,0.011413333316644033
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,127,0.010944000134865442
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,255,0.013061333447694778
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,255,0.013327999661366144
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,255,0.011605333536863327
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,255,0.013503999759753546
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,255,0.011509332805871964
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,255,0.011242666592200598
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,255,0.012991999586423239
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,255,0.011557333171367645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,255,0.011626667032639185
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,255,0.01292266696691513
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,255,0.011109333485364914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,255,0.011285333583752314
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,255,0.012335999558369318
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,255,0.011391999820868174
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,255,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,255,0.011215999722480774
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,511,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,511,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,511,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,511,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,511,0.013264000415802002
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,511,0.013274667163689932
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,511,0.013338666409254074
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,511,0.01312000056107839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,511,0.013221333424250284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,511,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,511,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,511,0.013690666606028875
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,511,0.013237333546082178
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,511,0.01351999988158544
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,511,0.013056000073750814
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,511,0.012954667210578918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,1023,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,1023,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,1023,0.013434667140245438
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,1023,0.01349866638580958
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,1023,0.013904000322024027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,1023,0.013157332936922709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,1023,0.013461332768201828
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,1023,0.015247999380032221
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,1023,0.013317332913478216
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,1023,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,1023,0.013621332744757334
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,1023,0.01603200038274129
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,1023,0.013002666334311167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,1023,0.013370666652917862
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,1023,0.012938667088747025
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,1023,0.013631999492645264
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,2047,0.017184000462293625
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,2047,0.01543466622630755
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,2047,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,2047,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,2047,0.013354666531085968
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,2047,0.01314666618903478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,2047,0.013584000368913015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,2047,0.013450667262077332
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,2047,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,2047,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,2047,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,2047,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,2047,0.01310933381319046
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,2047,0.013669333110253016
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,2047,0.013349333157142004
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,2047,0.013482666263977686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,4095,0.02057066683967908
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,4095,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,4095,0.017770666629076004
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,4095,0.015589332828919092
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,4095,0.015429332852363586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,4095,0.015114666273196539
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,4095,0.015669333438078564
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,4095,0.01562133307258288
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,4095,0.017984000345071156
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,4095,0.017162666966517765
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,4095,0.01700266698996226
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,4095,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,4095,0.015360000232855478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,4095,0.015509333461523056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,4095,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,4095,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,8191,0.026352000733216602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,8191,0.02163200080394745
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,8191,0.019850666324297588
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,8191,0.019509332875410717
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,8191,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,8191,0.01952533299724261
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,8191,0.01860800012946129
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,8191,0.019440000255902607
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,8191,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,8191,0.019146667172511418
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,8191,0.017658667018016178
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,8191,0.01773333301146825
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,8191,0.0182239996890227
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,8191,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,8191,0.018250666558742523
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,8191,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,16383,0.03804266701141993
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,16383,0.03012799968322118
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,16383,0.02514133354028066
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,16383,0.0234400009115537
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,16383,0.023525332411130268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,16383,0.02333866556485494
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,16383,0.023413332800070446
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,16383,0.023685333629449207
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,16383,0.027610667049884796
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,16383,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,16383,0.021498667697111767
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,16383,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,16383,0.021520001192887623
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,16383,0.01952533299724261
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,16383,0.021509334444999695
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,16383,0.02143466720978419
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,32767,0.03537066777547201
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,32767,0.03770666569471359
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,32767,0.030159999926884968
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,32767,0.028336000939210255
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,32767,0.028384000062942505
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,32767,0.02794666588306427
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,32767,0.027424000203609467
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,32767,0.029445332785447437
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,32767,0.033743999898433685
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,32767,0.02762666592995326
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,32767,0.025573333104451496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,32767,0.025920001169045765
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,32767,0.025706666211287182
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,32767,0.02624533325433731
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,32767,0.025226667523384094
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,65535,0.037802666425704956
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,32767,0.025242666403452556
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,65535,0.042037333051363625
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,65535,0.04343999922275543
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,65535,0.03559466699759165
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,65535,0.037045332292715706
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,65535,0.035973332822322845
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,65535,0.037087999284267426
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,65535,0.037952000896135964
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,65535,0.03555733213822047
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,65535,0.033717334270477295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,65535,0.02958400050799052
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,65535,0.029311999678611755
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,65535,0.029653333127498627
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,65535,0.02881066749493281
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,65535,0.027786667148272198
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,65535,0.030016000072161358
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,131071,0.057914664347966514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,131071,0.06266666452089946
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,131071,0.0710506687561671
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,131071,0.05522666871547699
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,131071,0.056330665946006775
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,131071,0.0544106662273407
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,131071,0.055311997731526695
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,131071,0.05699733396371206
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,131071,0.04230933388074239
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,131071,0.04174399872620901
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,131071,0.04193066557248434
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,131071,0.03403733422358831
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,131071,0.035504000882307686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,131071,0.03380800038576126
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,131071,0.03345600018898646
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,131071,0.03576533248027166
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,1,0.012085333466529846
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,1,0.01145600030819575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,1,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,1,0.013045333325862885
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,1,0.011253333340088526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,1,0.01211200033624967
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,1,0.011301333705584208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,1,0.011306667079528173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,1,0.01109333336353302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,1,0.01145600030819575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,1,0.011242666592200598
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,1,0.011173332730929056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,1,0.011215999722480774
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,1,0.011215999722480774
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,1,0.012714666624863943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,1,0.011237333218256632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,3,0.04718933502833048
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,3,0.011338666081428528
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,3,0.013077333569526672
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,3,0.012266666938861212
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,3,0.011429333438475927
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,3,0.011039999624093374
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,3,0.011274666835864386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,3,0.012714666624863943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,3,0.011247999966144562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,3,0.011407999942700068
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,3,0.011727999895811081
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,3,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,3,0.011215999722480774
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,3,0.011157333850860596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,3,0.011077333241701126
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,3,0.011141333729028702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,7,0.01309866706530253
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,7,0.01312000056107839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,7,0.012885333349307379
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,7,0.012159999459981918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,7,0.011370666325092316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,7,0.01202133297920227
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,7,0.011418666690587997
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,7,0.01116266722480456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,7,0.013258667041858038
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,7,0.011637333780527115
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,7,0.011168000598748526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,7,0.01097600037852923
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,7,0.011994666109482447
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,7,0.01109333336353302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,7,0.011413333316644033
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,7,0.011322667201360067
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,15,0.012053333222866058
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,15,0.012746666868527731
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,15,0.012554666648308435
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,15,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,15,0.011802667131026586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,15,0.011215999722480774
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,15,0.01139733319481214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,15,0.013221333424250284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,15,0.01232533281048139
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,15,0.011381333072980246
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,15,0.011178666104873022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,15,0.013978666315476099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,15,0.011445333560307821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,15,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,15,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,15,0.011941333611806234
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,31,0.029205332199732464
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,31,0.01129066695769628
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,31,0.013125333935022354
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,31,0.012938667088747025
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,31,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,31,0.011434666812419891
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,31,0.011546666423479715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,31,0.01321600005030632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,31,0.012784000486135483
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,31,0.011621333658695221
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,31,0.013023999830087027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,31,0.012655999511480331
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,31,0.011461333682139715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,31,0.010922666639089584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,31,0.011285333583752314
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,31,0.01156266654531161
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,63,0.012917333592971167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,63,0.011168000598748526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,63,0.011077333241701126
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,63,0.012223999947309494
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,63,0.011648000528415045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,63,0.013546666751305262
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,63,0.011706666400035223
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,63,0.011226666470368704
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,63,0.01116266722480456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,63,0.01090666651725769
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,63,0.011247999966144562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,63,0.01119999960064888
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,63,0.011477333803971609
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,63,0.011626667032639185
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,63,0.011034666250149408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,63,0.011050666371981302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,127,0.01118933285276095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,127,0.012549333274364471
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,127,0.013301332791646322
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,127,0.01184533288081487
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,127,0.011663999408483505
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,127,0.011285333583752314
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,127,0.01251199965675672
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,127,0.011370666325092316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,127,0.011168000598748526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,127,0.011514666179815928
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,127,0.011152000476916632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,127,0.011690666278203329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,127,0.011168000598748526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,127,0.011338666081428528
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,127,0.011482667177915573
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,127,0.012650666137536367
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,255,0.012037333101034164
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,255,0.010954666882753372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,255,0.012752000242471695
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,255,0.012890666723251343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,255,0.014053333550691605
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,255,0.011002667248249054
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,255,0.011626667032639185
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,255,0.013050666699806849
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,255,0.012986666212479273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,255,0.012453333785136541
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,255,0.011178666104873022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,255,0.013077333569526672
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,255,0.011434666812419891
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,255,0.011765333513418833
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,255,0.01192533348997434
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,255,0.01251199965675672
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,511,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,511,0.013717333475748697
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,511,0.013125333935022354
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,511,0.013616000612576803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,511,0.013269333789745966
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,511,0.011855999628702799
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,511,0.013183999806642532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,511,0.013440000514189402
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,511,0.013077333569526672
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,511,0.013232000172138214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,511,0.013056000073750814
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,511,0.012730666746695837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,511,0.012944000462690989
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,511,0.013056000073750814
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,511,0.012970666090647379
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,511,0.013210666676362356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,1023,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,1023,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,1023,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,1023,0.012954667210578918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,1023,0.012869333227475485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,1023,0.012330666184425354
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,1023,0.011541333049535751
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,1023,0.013418667018413544
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,1023,0.013210666676362356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,1023,0.01302933320403099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,1023,0.013173333058754602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,1023,0.013045333325862885
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,1023,0.013157332936922709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,1023,0.01139733319481214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,1023,0.011653333902359009
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,1023,0.01250133290886879
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,2047,0.017621333400408428
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,2047,0.015754666179418564
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,2047,0.01738133281469345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,2047,0.013023999830087027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,2047,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,2047,0.013104000439246496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,2047,0.012853333105643591
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,2047,0.013056000073750814
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,2047,0.015599999576807022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,2047,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,2047,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,2047,0.01328533391157786
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,2047,0.012618667135636011
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,2047,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,2047,0.012997332960367203
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,2047,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,4095,0.022426667312781017
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,4095,0.019786667078733444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,4095,0.017456000049908955
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,4095,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,4095,0.015770666301250458
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,4095,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,4095,0.01676799977819125
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,4095,0.015599999576807022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,4095,0.0195573332409064
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,4095,0.016949333250522614
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,4095,0.01782400036851565
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,4095,0.01540800059835116
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,4095,0.015370666980743408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,4095,0.015552000453074774
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,4095,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,4095,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,8191,0.0314026673634847
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,8191,0.023530667026837666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,8191,0.021557333568731945
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,8191,0.01897066707412402
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,8191,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,8191,0.017802666872739792
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,8191,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,8191,0.017711999515692394
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,8191,0.021722666919231415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,8191,0.01958400011062622
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,8191,0.020175999651352566
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,8191,0.019354666272799175
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,8191,0.017637333522240322
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,8191,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,8191,0.018357332795858383
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,8191,0.01759999990463257
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,16383,0.02754666656255722
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,16383,0.03398400048414866
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,16383,0.027242665489514668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,16383,0.02665599932273229
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,16383,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,16383,0.025519999365011852
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,16383,0.023962666591008503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,16383,0.02422933280467987
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,16383,0.027722666660944622
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,16383,0.025536000728607178
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,16383,0.022954667607943218
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,16383,0.021301334102948506
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,16383,0.023472001155217487
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,16383,0.02178666740655899
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,16383,0.021525333325068157
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,16383,0.021525333325068157
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,32767,0.034048000971476235
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,32767,0.0382080003619194
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,32767,0.03788800040880839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,32767,0.029824001093705494
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,32767,0.03146666785081228
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,32767,0.031583999594052635
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,32767,0.03038399914900462
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,32767,0.03186133255561193
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,32767,0.029743999242782593
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,32767,0.033973333736260734
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,32767,0.026752000053723652
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,32767,0.027818667391935985
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,32767,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,32767,0.027653334041436512
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,32767,0.027269333600997925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,32767,0.025983999172846477
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,65535,0.05100800096988678
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,65535,0.06057066718737284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,65535,0.057061334451039634
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,65535,0.054746667544047035
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,65535,0.05492266515890757
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,65535,0.05708266794681549
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,65535,0.05593599875768026
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,65535,0.05551466842492422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,65535,0.0378560001651446
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,65535,0.03677333394686381
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,65535,0.03483733286460241
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,65535,0.031119999786218006
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,65535,0.031914666295051575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,65535,0.03190399954716364
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,65535,0.02958400050799052
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,65535,0.031914666295051575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,131071,0.07568533221880595
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,131071,0.08361066381136577
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,131071,0.09271466732025146
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,131071,0.07790400087833405
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,131071,0.07517333328723907
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,131071,0.07993066807587941
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,131071,0.07900799810886383
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,131071,0.07779199878374736
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,131071,0.05236266553401947
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,131071,0.056176001826922096
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,131071,0.05300800005594889
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,131071,0.0460746685663859
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,131071,0.04663999875386556
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,131071,0.04985600213209788
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,131071,0.04979733129342397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,131071,0.047450666626294456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,1,0.013493333011865616
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,1,0.013221333424250284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,1,0.013280000537633896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,1,0.011450666934251785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,1,0.011173332730929056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,1,0.012058666596810022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,1,0.012400000045696894
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,1,0.01312000056107839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,1,0.01309866706530253
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,1,0.013082666943470636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,1,0.011205332974592844
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,1,0.012869333227475485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,1,0.011359999577204386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,1,0.011370666325092316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,1,0.011306667079528173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,1,0.01232533281048139
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,3,0.013162666310866674
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,3,0.011413333316644033
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,3,0.011711999773979187
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,3,0.013141332815090815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,3,0.011066666493813196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,3,0.010933333386977514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,3,0.01110400011142095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,3,0.012432000289360682
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,3,0.013183999806642532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,3,0.011557333171367645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,3,0.011338666081428528
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,3,0.011402666568756104
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,3,0.011445333560307821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,3,0.011424000064531961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,3,0.01156266654531161
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,3,0.011461333682139715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,7,0.013274667163689932
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,7,0.013397333522637686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,7,0.01357866699496905
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,7,0.011839999506870905
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,7,0.011365332951148352
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,7,0.012047999848922094
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,7,0.023717333873112995
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,7,0.012800000607967377
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,7,0.013525333255529404
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,7,0.013189333180586496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,7,0.01101333275437355
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,7,0.012736000120639801
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,7,0.011034666250149408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,7,0.012042666474978128
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,7,0.010794666906197866
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,7,0.011706666400035223
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,15,0.013002666334311167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,15,0.013642666240533194
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,15,0.011493333925803503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,15,0.013232000172138214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,15,0.011770666887362799
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,15,0.011541333049535751
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,15,0.011141333729028702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,15,0.013045333325862885
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,15,0.01370666672786077
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,15,0.013232000172138214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,15,0.01118933285276095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,15,0.011727999895811081
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,15,0.011087999989589056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,15,0.011359999577204386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,15,0.01146666705608368
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,15,0.010768000036478043
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,31,0.01293333371480306
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,31,0.012858666479587555
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,31,0.011242666592200598
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,31,0.01129066695769628
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,31,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,31,0.011450666934251785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,31,0.011663999408483505
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,31,0.011535999675591787
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,31,0.014074667046467463
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,31,0.011413333316644033
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,31,0.011605333536863327
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,31,0.011690666278203329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,31,0.01139733319481214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,31,0.012346666306257248
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,31,0.01138666644692421
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,31,0.011519999553759893
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,63,0.01309866706530253
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,63,0.013317332913478216
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,63,0.01156266654531161
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,63,0.011125333607196808
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,63,0.010938666760921478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,63,0.011141333729028702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,63,0.011482667177915573
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,63,0.01301866645614306
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,63,0.012944000462690989
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,63,0.012975999464591345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,63,0.01119999960064888
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,63,0.011509332805871964
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,63,0.011674666156371435
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,63,0.01099733387430509
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,63,0.011610666910807291
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,63,0.011130666981140772
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,127,0.013338666409254074
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,127,0.013061333447694778
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,127,0.01239466667175293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,127,0.013397333522637686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,127,0.011365332951148352
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,127,0.011114666859308878
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,127,0.011445333560307821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,127,0.012042666474978128
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,127,0.013477332890033722
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,127,0.012661332885424295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,127,0.011029332876205444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,127,0.011359999577204386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,127,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,127,0.01110400011142095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,127,0.011349332829316458
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,127,0.011770666887362799
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,255,0.013066666821638743
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,255,0.013013333082199097
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,255,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,255,0.010933333386977514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,255,0.011770666887362799
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,255,0.012618667135636011
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,255,0.011247999966144562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,255,0.011274666835864386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,255,0.012671999633312225
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,255,0.012837332983811697
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,255,0.010885333021481832
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,255,0.011434666812419891
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,255,0.01166933278242747
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,255,0.011173332730929056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,255,0.010933333386977514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,255,0.011424000064531961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,511,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,511,0.013701333353916803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,511,0.01370666672786077
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,511,0.013338666409254074
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,511,0.013178666432698568
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,511,0.011802667131026586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,511,0.013034666577974955
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,511,0.011322667201360067
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,511,0.013274667163689932
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,511,0.013674666484196981
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,511,0.013082666943470636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,511,0.01314666618903478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,511,0.012373333175977072
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,511,0.013050666699806849
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,511,0.011685332904259363
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,511,0.01328533391157786
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,1023,0.017173333714405697
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,1023,0.016506666938463848
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,1023,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,1023,0.013466666142145792
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,1023,0.013221333424250284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,1023,0.013376000026861826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,1023,0.012890666723251343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,1023,0.01309866706530253
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,1023,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,1023,0.013461332768201828
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,1023,0.013151999562978745
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,1023,0.013167999684810638
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,1023,0.012261333564917246
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,1023,0.01145600030819575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,1023,0.013280000537633896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,1023,0.011930666863918304
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,2047,0.02170666555563609
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,2047,0.0176959993938605
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,2047,0.017349333812793095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,2047,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,2047,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,2047,0.015829333414634068
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,2047,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,2047,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,2047,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,2047,0.015392000476519266
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,2047,0.01540800059835116
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,2047,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,2047,0.013370666652917862
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,2047,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,2047,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,1,0.011152000476916632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,1,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,1,0.011109333485364914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,1,0.011792000383138657
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,1,0.012901333471139273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,1,0.012741333494583765
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,1,0.012351999680201212
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,1,0.012837332983811697
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,1,0.013669333110253016
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,1,0.01309866706530253
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,1,0.012906666845083237
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,1,0.01458666721979777
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,1,0.011071999867757162
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,1,0.011018666128317514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,3,0.01179733375708262
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,1,0.01110400011142095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,1,0.010954666882753372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,3,0.013317332913478216
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,3,0.011727999895811081
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,3,0.011306667079528173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,3,0.011264000087976456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,3,0.012863999853531519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,3,0.011109333485364914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,3,0.012831999609867731
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,3,0.01146666705608368
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,3,0.01268799975514412
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,3,0.011519999553759893
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,3,0.011509332805871964
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,3,0.011237333218256632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,3,0.011317333827416102
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,3,0.01110400011142095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,7,0.01173866664369901
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,7,0.011765333513418833
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,7,0.010879999647537867
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,3,0.010858666151762009
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,7,0.010853332777818045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,7,0.01119999960064888
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,7,0.011007999380429586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,7,0.011999999483426413
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,7,0.012069333344697952
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,7,0.011045332998037338
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,7,0.012400000045696894
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,7,0.02161066730817159
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,7,0.01098666712641716
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,7,0.011440000186363855
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,7,0.011685332904259363
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,15,0.01293333371480306
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,7,0.011594666788975397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,15,0.013370666652917862
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,15,0.013162666310866674
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,7,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,15,0.012842666357755661
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,15,0.01126933346192042
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,15,0.012341332932313284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,15,0.01163200040658315
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,15,0.011589333415031433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,15,0.012517333030700684
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,15,0.011343999455372492
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,15,0.011503999431928
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,15,0.011365332951148352
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,15,0.01081066702802976
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,15,0.01180800050497055
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,31,0.010981333752473196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,31,0.012944000462690989
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,31,0.01116266722480456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,31,0.012703999876976013
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,15,0.010992000500361124
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,31,0.010778666784365972
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,31,0.012960000584522883
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,31,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,31,0.01108266661564509
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,15,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,31,0.01312000056107839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,31,0.011381333072980246
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,31,0.011871999750534693
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,31,0.011450666934251785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,31,0.011050666371981302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,31,0.01163200040658315
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,31,0.011407999942700068
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,63,0.01259200026591619
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,63,0.013045333325862885
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,63,0.013162666310866674
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,63,0.013487999637921652
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,63,0.011370666325092316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,63,0.013295999417702356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,63,0.012138667205969492
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,63,0.01091733326514562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,63,0.011402666568756104
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,63,0.01211200033624967
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,63,0.011007999380429586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,63,0.011605333536863327
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,63,0.011418666690587997
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,63,0.01157333329319954
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,63,0.011194666226704916
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,127,0.011546666423479715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,127,0.0124746672809124
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,127,0.011413333316644033
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,127,0.01184533288081487
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,127,0.011247999966144562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,127,0.011519999553759893
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,127,0.011765333513418833
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,31,0.011039999624093374
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,127,0.01181866725285848
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,127,0.011120000233252844
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,127,0.011429333438475927
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,127,0.011194666226704916
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,127,0.01302933320403099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,127,0.011125333607196808
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,127,0.011194666226704916
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,127,0.010970667004585266
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,127,0.011221333096424738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,63,0.011039999624093374
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,255,0.01302933320403099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,255,0.01089599976936976
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,255,0.012986666212479273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,255,0.01090666651725769
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,255,0.01240533341964086
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,255,0.01101333275437355
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,255,0.010794666906197866
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,255,0.012458667159080505
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,255,0.010885333021481832
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,255,0.012896000097195307
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,255,0.01099733387430509
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,255,0.01138666644692421
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,255,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,255,0.011130666981140772
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,255,0.01126933346192042
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,511,0.013376000026861826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,511,0.013493333011865616
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,511,0.013157332936922709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,511,0.013178666432698568
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,511,0.012896000097195307
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,511,0.01310933381319046
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,511,0.013221333424250284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,511,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,255,0.011434666812419891
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,511,0.01332266628742218
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,511,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,511,0.014357333381970724
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,511,0.013290667285521826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,511,0.013056000073750814
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,511,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,511,0.012890666723251343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,1023,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,1023,0.014789332946141561
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,1023,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,511,0.013232000172138214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,1023,0.01303999995191892
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,1023,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,1023,0.014165333161751429
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,1023,0.012858666479587555
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,1023,0.015130666395028433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,1023,0.01553600033124288
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,1023,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,1023,0.015087999403476715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,1023,0.013221333424250284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,1023,0.01303999995191892
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,1023,0.012986666212479273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,1023,0.013199999928474426
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,2047,0.017781333376963932
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,2047,0.015546667079130808
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,2047,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,1023,0.013471999516089758
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,2047,0.013744000345468521
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,2047,0.013386666774749756
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,2047,0.013397333522637686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,2047,0.01569066693385442
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,2047,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,2047,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,2047,0.01321600005030632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,2047,0.01321600005030632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,2047,0.01370666672786077
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,2047,0.01358933374285698
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,2047,0.013477332890033722
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,2047,0.013354666531085968
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,4095,0.019621333728233974
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,2047,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,4095,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,4095,0.017397332936525345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,4095,0.015205333630243937
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,4095,0.019445333629846573
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,4095,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,4095,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,4095,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,4095,0.016623999923467636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,4095,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,4095,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,4095,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,4095,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,4095,0.014815999815861383
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,4095,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,8191,0.026159999271233875
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,8191,0.02176533391078313
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,8191,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,8191,0.019386666516462963
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,4095,0.015402667224407196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,8191,0.01842133328318596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,8191,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,8191,0.018837332725524902
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,8191,0.019333332777023315
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,8191,0.021141332884629566
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,8191,0.018895999838908512
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,8191,0.019029332945744198
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,8191,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,8191,0.016885332763195038
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,8191,0.01741333305835724
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,8191,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,16383,0.03873066604137421
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,16383,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,16383,0.025498665869235992
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,16383,0.023397333920001984
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,16383,0.023183998962243397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,16383,0.02314666658639908
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,16383,0.023045333723227184
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,16383,0.023370665808518726
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,16383,0.025392000873883564
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,16383,0.02277333289384842
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,8191,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,16383,0.021104000508785248
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,16383,0.02094399929046631
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,16383,0.020693333198626835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,16383,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,16383,0.02088533341884613
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,32767,0.03363200028737386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,32767,0.03758399933576584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,32767,0.030213333666324615
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,32767,0.028805332879225414
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,32767,0.02735999971628189
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,32767,0.02789866675933202
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,32767,0.02759466568628947
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,32767,0.028218666712443035
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,32767,0.033226666351159416
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,32767,0.027215999861558277
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,32767,0.025594666600227356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,32767,0.025546667476495106
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,32767,0.025749333202838898
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,32767,0.025050667424996693
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,32767,0.026176000634829204
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,65535,0.038176000118255615
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,32767,0.025648000339667004
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,65535,0.04233066737651825
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,65535,0.04304533203442892
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,65535,0.03772266705830892
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,65535,0.0359199990828832
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,65535,0.03404266635576884
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,65535,0.036362667878468834
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,16383,0.019039999693632126
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,65535,0.035562666753927864
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,65535,0.029802667597929638
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,65535,0.030048000315825146
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,65535,0.028181334336598713
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,65535,0.03562666724125544
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,65535,0.029909332593282063
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,65535,0.02991466720898946
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,65535,0.029493334392706554
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1,1,1,131071,0.05775466561317444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1,1,2,131071,0.06279466549555461
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1,1,4,131071,0.07089599967002869
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1,1,16,131071,0.05385066568851471
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1,1,32,131071,0.05644799768924713
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1,1,64,131071,0.05610666672388712
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1,1,128,131071,0.055861334005991616
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,131071,0.04181866844495138
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1,1,8,131071,0.05448000133037567
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1,1,4,131071,0.0429066667954127
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1,1,1,65535,0.036357333262761436
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1,1,16,131071,0.033770665526390076
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1,1,32,131071,0.03384000062942505
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1,1,64,131071,0.03597866743803024
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1,1,2,131071,0.04167999823888143
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,1,0.012970666090647379
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,1,0.012949333836634954
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,1,0.012730666746695837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,1,0.012757333616415659
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,1,0.011637333780527115
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,1,0.011301333705584208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,1,0.011424000064531961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,1,0.011333333949247995
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1,1,8,131071,0.03572266548871994
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,1,0.01228800043463707
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,1,0.01121066634853681
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,1,0.012831999609867731
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,1,0.012069333344697952
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,1,0.011472000430027643
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,1,0.01166933278242747
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,1,0.011114666859308878
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,3,0.012949333836634954
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,3,0.012874666601419449
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,3,0.012469333906968435
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,3,0.01303999995191892
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,3,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,3,0.011317333827416102
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,1,0.010911999891201654
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,3,0.01102399950226148
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,3,0.011029332876205444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,3,0.01109333336353302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,3,0.011370666325092316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,3,0.011018666128317514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1,1,128,131071,0.03384000062942505
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,3,0.010901333143313726
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,3,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,3,0.011136000355084738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,7,0.013072000195582708
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,3,0.011306667079528173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,7,0.01314666618903478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,7,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,7,0.01320533330241839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,7,0.011087999989589056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,7,0.012789333860079447
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,7,0.011637333780527115
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,7,0.012624000509579977
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,7,0.012416000167528788
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,7,0.011226666470368704
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,7,0.011701333026091257
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,7,0.011402666568756104
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,7,0.011584000041087469
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,3,0.011519999553759893
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,7,0.011472000430027643
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,7,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,15,0.013189333180586496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,15,0.011488000551859537
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,15,0.012223999947309494
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,15,0.012250666817029318
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,15,0.013189333180586496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,15,0.011722666521867117
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,15,0.011205332974592844
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,15,0.013418667018413544
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,15,0.012858666479587555
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,15,0.013072000195582708
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,15,0.012282667060693106
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,15,0.011952000359694162
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,15,0.011312000453472137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,7,0.011018666128317514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,15,0.011301333705584208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,31,0.011231999844312668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,15,0.01116266722480456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,31,0.011007999380429586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,31,0.011605333536863327
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,31,0.011173332730929056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,15,0.011253333340088526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,31,0.011637333780527115
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,31,0.011685332904259363
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,31,0.012965332716703415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,31,0.011343999455372492
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,31,0.011695999652147293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,31,0.013429333766301474
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,31,0.01139733319481214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,31,0.011525332927703857
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,31,0.0120319997270902
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,31,0.011141333729028702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,31,0.012053333222866058
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,63,0.01128000020980835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,63,0.011450666934251785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,63,0.012026666353146235
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,63,0.01181866725285848
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,63,0.011594666788975397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,63,0.011178666104873022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,63,0.013258667041858038
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,63,0.011157333850860596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,63,0.011584000041087469
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,63,0.012997332960367203
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,63,0.011429333438475927
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,63,0.01145600030819575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,63,0.012256000190973282
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,63,0.011482667177915573
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,63,0.011440000186363855
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,127,0.013349333157142004
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,127,0.012096000214417776
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,127,0.012549333274364471
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,63,0.011114666859308878
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,127,0.011690666278203329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,127,0.012874666601419449
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,127,0.011498666057984034
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,127,0.01246400053302447
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,127,0.012400000045696894
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,127,0.013210666676362356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,127,0.014032000054915747
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,127,0.0129120002190272
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,127,0.011221333096424738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,31,0.011226666470368704
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,127,0.011370666325092316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,127,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,255,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,255,0.011626667032639185
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,255,0.011530666301647821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,255,0.013295999417702356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,255,0.011424000064531961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,255,0.012175999581813812
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,127,0.011301333705584208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,255,0.011413333316644033
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,255,0.011514666179815928
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,255,0.01180800050497055
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,255,0.011317333827416102
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,255,0.012053333222866058
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,127,0.01138666644692421
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,255,0.011760000139474869
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,255,0.011717333147923151
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,255,0.011690666278203329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,255,0.011029332876205444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,255,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,511,0.012778667112191519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,511,0.013141332815090815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,511,0.013173333058754602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,511,0.013525333255529404
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,511,0.012805332740147909
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,511,0.013424000392357508
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,511,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,511,0.01313599944114685
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,511,0.01351999988158544
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,511,0.013248000293970108
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,511,0.013424000392357508
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,511,0.013242666920026144
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,511,0.012885333349307379
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,511,0.01156266654531161
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,511,0.013269333789745966
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,1023,0.015103999525308609
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,1023,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,1023,0.013445333888133367
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,1023,0.019519999623298645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,1023,0.011002667248249054
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,1023,0.011893333246310553
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,1023,0.012719999998807907
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,1023,0.011087999989589056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,1023,0.014677333335081736
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,1023,0.013354666531085968
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,1023,0.012639999389648438
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,1023,0.019343999524911244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,511,0.01303999995191892
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,1023,0.012698666503032049
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,1023,0.013194666554530462
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,1023,0.01099733387430509
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,2047,0.01720533271630605
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,2047,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,2047,0.01339200014869372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,2047,0.013007999708255133
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,1023,0.01231466606259346
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,2047,0.012938667088747025
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,2047,0.012890666723251343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,2047,0.013210666676362356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,2047,0.015482666591803232
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,2047,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,2047,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,2047,0.013653332988421122
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,2047,0.013477332890033722
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,2047,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,2047,0.011610666910807291
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,2047,0.017525333911180496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,2047,0.013162666310866674
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,4095,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,4095,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,4095,0.017509333789348602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,4095,0.021829334398110706
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,4095,0.017125333348910015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,4095,0.015317333241303762
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,4095,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,4095,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,4095,0.016058667252461117
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,4095,0.01533866673707962
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,4095,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,4095,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,4095,0.015370666980743408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,4095,0.015642666568358738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,8191,0.02978666623433431
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,8191,0.02420799930890401
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,4095,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,8191,0.02187199890613556
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,8191,0.019082666685183842
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,8191,0.01907733331123988
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,8191,0.019088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,8191,0.018357332795858383
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,8191,0.02342933416366577
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,8191,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,8191,0.019386666516462963
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,8191,0.019130667050679524
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,8191,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,8191,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,8191,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,4095,0.017312000195185345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,16383,0.029317334294319153
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,16383,0.033733333150545754
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,8191,0.01956266661485036
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,16383,0.027450665831565857
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,16383,0.025568000972270966
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,16383,0.025386666258176167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,16383,0.025637333591779072
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,8191,0.01724799970785777
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,16383,0.02569066733121872
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,16383,0.028005334238211315
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,16383,0.022458667556444805
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,16383,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,16383,0.02139200021823247
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,16383,0.022853332261244457
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,16383,0.02145066608985265
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,16383,0.021935999393463135
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,16383,0.024959998826185863
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,32767,0.03201599915822347
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,32767,0.04131199916203817
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,32767,0.03753600021203359
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,32767,0.032186667124430336
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,32767,0.03107200066248576
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,32767,0.03222399950027466
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,32767,0.03178666780392329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,32767,0.031680000325044
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,32767,0.029658667743206024
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,32767,0.033743999898433685
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,32767,0.027141332626342773
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,32767,0.02699733277161916
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,32767,0.025829332570234936
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,32767,0.02717866748571396
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,16383,0.023317334552605946
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,32767,0.02550400048494339
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,32767,0.027786667148272198
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,65535,0.05111999809741974
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,65535,0.059338668982187905
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,65535,0.05903466542561849
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,65535,0.05276800195376078
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,65535,0.05508799850940704
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,65535,0.05568000177542368
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,65535,0.05624000231424967
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,65535,0.03557866563399633
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,65535,0.03764266769091288
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,65535,0.033759998778502144
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,65535,0.05394133428732554
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,65535,0.03243733445803324
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,65535,0.03123733401298523
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,65535,0.032101333141326904
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,65535,0.03164266546567281
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,2,1,1,131071,0.07522133489449818
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,65535,0.03129599988460541
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,2,1,4,131071,0.09259733557701111
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,2,1,8,131071,0.07729066908359528
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,2,1,16,131071,0.0772266685962677
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,2,1,64,131071,0.0773173322280248
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,2,1,2,131071,0.08475733796755473
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,2,1,1,131071,0.052239999175071716
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,2,1,2,131071,0.05699733396371206
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,2,1,4,131071,0.05282133320967356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,2,1,8,131071,0.04810666541258494
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,2,1,32,131071,0.0786240001519521
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,2,1,16,131071,0.04634666442871094
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,2,1,32,131071,0.047498668233553566
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,2,1,64,131071,0.04994666576385498
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,2,1,128,131071,0.04861866434415182
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,1,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,1,0.01312000056107839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,1,0.011584000041087469
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,2,1,128,131071,0.08067200084527333
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,1,0.011770666887362799
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,1,0.011157333850860596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,1,0.012938667088747025
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,1,0.012741333494583765
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,1,0.011429333438475927
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,1,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,1,0.011589333415031433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,1,0.01109333336353302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,1,0.013023999830087027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,1,0.011690666278203329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,1,0.011770666887362799
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,3,0.013002666334311167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,3,0.01357866699496905
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,3,0.013178666432698568
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,1,0.011365332951148352
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,3,0.011674666156371435
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,3,0.011871999750534693
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,3,0.01198400060335795
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,3,0.011077333241701126
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,3,0.013386666774749756
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,3,0.013989333063364029
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,3,0.011231999844312668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,3,0.011616000284751257
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,3,0.010837333897749582
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,3,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,3,0.011050666371981302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,3,0.011141333729028702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,7,0.012879999975363413
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,7,0.012847999731699625
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,7,0.010890666395425797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,7,0.013007999708255133
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,7,0.01108266661564509
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,7,0.011242666592200598
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,7,0.010954666882753372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,1,0.01145600030819575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,7,0.01301866645614306
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,7,0.012896000097195307
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,7,0.01090666651725769
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,7,0.011626667032639185
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,7,0.011488000551859537
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,7,0.011173332730929056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,3,0.013274667163689932
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,7,0.011381333072980246
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,15,0.013525333255529404
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,15,0.01210133358836174
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,15,0.013013333082199097
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,15,0.012170666207869848
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,7,0.011039999624093374
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,15,0.011173332730929056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,15,0.011834666132926941
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,15,0.011600000162919363
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,15,0.011114666859308878
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,15,0.011152000476916632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,15,0.011183999478816986
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,15,0.01116266722480456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,15,0.013242666920026144
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,15,0.011136000355084738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,7,0.010922666639089584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,15,0.010816000401973724
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,15,0.011317333827416102
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,15,0.010960000256697336
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,31,0.012965332716703415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,31,0.011391999820868174
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,31,0.01109333336353302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,31,0.011610666910807291
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,31,0.011136000355084738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,31,0.011045332998037338
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,31,0.011312000453472137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,31,0.012485332787036896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,31,0.012373333175977072
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,31,0.011413333316644033
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,31,0.011359999577204386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,31,0.011450666934251785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,31,0.011157333850860596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,31,0.012917333592971167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,31,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,63,0.013290667285521826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,63,0.011391999820868174
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,63,0.012554666648308435
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,63,0.011253333340088526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,31,0.011285333583752314
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,63,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,63,0.011434666812419891
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,63,0.011221333096424738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,63,0.010853332777818045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,63,0.011519999553759893
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,63,0.012997332960367203
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,63,0.011194666226704916
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,63,0.012144000579913458
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,63,0.012847999731699625
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,63,0.012432000289360682
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,127,0.013157332936922709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,127,0.013424000392357508
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,127,0.013050666699806849
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,127,0.01312000056107839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,63,0.011253333340088526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,127,0.012618667135636011
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,127,0.011333333949247995
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,127,0.01331199953953425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,127,0.01302933320403099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,127,0.013061333447694778
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,127,0.01139733319481214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,127,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,127,0.011264000087976456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,127,0.011402666568756104
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,63,0.01101333275437355
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,127,0.011359999577204386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,127,0.01098666712641716
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,255,0.013072000195582708
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,255,0.013290667285521826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,255,0.013434667140245438
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,255,0.011461333682139715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,127,0.011440000186363855
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,255,0.01126933346192042
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,255,0.01302933320403099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,255,0.013023999830087027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,255,0.012858666479587555
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,255,0.011605333536863327
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,255,0.012117333710193634
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,255,0.011322667201360067
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,255,0.011530666301647821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,255,0.011301333705584208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,255,0.01101333275437355
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,511,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,511,0.012986666212479273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,255,0.011973333855470022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,511,0.013194666554530462
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,511,0.012917333592971167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,511,0.0129120002190272
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,511,0.012917333592971167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,511,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,255,0.010885333021481832
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,511,0.013397333522637686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,511,0.012954667210578918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,511,0.013045333325862885
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,511,0.011242666592200598
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,511,0.012853333105643591
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,511,0.01166933278242747
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,511,0.012757333616415659
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,511,0.011178666104873022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,1023,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,511,0.013269333789745966
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,1023,0.013050666699806849
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,1023,0.013280000537633896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,1023,0.011247999966144562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,1023,0.013525333255529404
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,1023,0.013157332936922709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,1023,0.015791999797026317
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,1023,0.013839999834696451
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,1023,0.013850666582584381
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,1023,0.012901333471139273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,1023,0.011834666132926941
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,1023,0.012938667088747025
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,1023,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,1023,0.011136000355084738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,2047,0.021546666820844013
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,2047,0.017093333105246227
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,1023,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,2047,0.01586666703224182
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,2047,0.015285332997639975
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,2047,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,2047,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,2047,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,2047,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,2047,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,2047,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,2047,0.015253332753976187
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,2047,0.01534933348496755
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,2047,0.01488000030318896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,2047,0.013199999928474426
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,1023,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,2047,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,4095,0.02888533224662145
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,4095,0.01956266661485036
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,4095,0.017152000218629837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,4095,0.017407999684413273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,2047,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,4095,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,4095,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,4095,0.023605334262053173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,4095,0.01918399954835574
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,4095,0.01782400036851565
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,4095,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,4095,0.016986666868130367
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,4095,0.015504000087579092
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,4095,0.015450666348139444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,4095,0.015754666179418564
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,4095,0.014896000425020853
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,8191,0.02569599946339925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,8191,0.02958400050799052
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,8191,0.024117333193620045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,8191,0.021456000705560047
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,8191,0.02086399992307027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,8191,0.02107733239730199
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,8191,0.019573333362738293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,4095,0.021407999098300934
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,8191,0.024346667031447094
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,8191,0.021221332252025604
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,8191,0.01958400011062622
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,8191,0.01932266727089882
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,8191,0.019365333020687103
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,8191,0.01945066700379054
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,8191,0.019317333896954853
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,8191,0.01781333362062772
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,16383,0.02957333376010259
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,16383,0.035445332527160645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,16383,0.029418667157491047
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,16383,0.027119999130566914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,16383,0.027445333699385326
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,16383,0.025722667574882507
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,16383,0.02733866622050603
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,16383,0.0260959987839063
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,16383,0.027552001178264618
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,16383,0.023914667467276256
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,16383,0.021674667795499165
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,16383,0.022106667359670002
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,16383,0.02128533273935318
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,16383,0.02165866643190384
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,16383,0.021242665747801464
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,32767,0.0489333321650823
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,32767,0.05402666827042898
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,32767,0.05592533449331919
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,8191,0.023530667026837666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,32767,0.04961599906285604
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,16383,0.03192000091075897
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,32767,0.04937066634496053
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,32767,0.05048533280690511
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,32767,0.050069332122802734
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,32767,0.050069332122802734
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,32767,0.032816000282764435
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,32767,0.03179199993610382
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,32767,0.03161599983771642
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,32767,0.027210667729377747
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,32767,0.025413334369659424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,32767,0.027098665634791057
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,32767,0.02587733417749405
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,65535,0.07126399874687195
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,65535,0.07984533409277599
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,65535,0.09097066521644592
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,65535,0.0728000005086263
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,65535,0.07450133562088013
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,65535,0.07403199871381123
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,32767,0.027258666853109997
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,65535,0.07259733478228252
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,65535,0.07457600037256877
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,65535,0.05023466547330221
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,65535,0.05249600112438202
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,65535,0.05053866902987162
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,65535,0.04343999922275543
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,65535,0.04324266811211904
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,65535,0.04408533374468485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,65535,0.043466667334238686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,4,1,1,131071,0.11961600184440613
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,4,1,2,131071,0.13129599889119467
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,4,1,4,131071,0.1585653324921926
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,4,1,8,131071,0.12318933010101318
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,65535,0.04394666850566864
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,4,1,32,131071,0.12110933661460876
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,4,1,64,131071,0.12168000141779582
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,4,1,128,131071,0.12126400073369344
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,4,1,1,131071,0.07241066793600719
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,4,1,16,131071,0.12197867035865784
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,4,1,2,131071,0.07268799841403961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,4,1,4,131071,0.08149866759777069
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,4,1,8,131071,0.06485866506894429
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,4,1,32,131071,0.06507200002670288
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,4,1,64,131071,0.06481066842873891
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,4,1,128,131071,0.06505066653092702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,1,0.01381333296497663
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,1,0.013295999417702356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,1,0.012917333592971167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,1,0.013317332913478216
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,1,0.012266666938861212
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,1,0.012954667210578918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,1,0.01179733375708262
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,1,0.011402666568756104
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,1,0.013765333841244379
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,1,0.011274666835864386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,1,0.011296000331640244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,1,0.011498666057984034
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,1,0.011002667248249054
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,1,0.01101333275437355
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,1,0.01321600005030632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,1,0.011402666568756104
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,3,0.013290667285521826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,3,0.011839999506870905
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,3,0.012437333663304647
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,3,0.012202666451533636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,3,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,3,0.012069333344697952
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,3,0.011418666690587997
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,3,0.013584000368913015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,3,0.013162666310866674
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,3,0.013023999830087027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,3,0.011407999942700068
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,3,0.011322667201360067
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,3,0.011253333340088526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,3,0.012245333443085352
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,3,0.011424000064531961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,3,0.011328000575304031
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,7,0.013584000368913015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,7,0.012826666235923767
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,7,0.013183999806642532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,7,0.012960000584522883
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,7,0.011445333560307821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,7,0.011930666863918304
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,7,0.01137599969903628
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,7,0.013552000125249227
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,7,0.013093333691358566
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,7,0.01313599944114685
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,4,1,16,131071,0.06411199768384297
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,7,0.010938666760921478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,7,0.011418666690587997
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,7,0.01098666712641716
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,7,0.011328000575304031
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,7,0.011359999577204386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,15,0.012991999586423239
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,15,0.012944000462690989
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,7,0.013151999562978745
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,15,0.01109333336353302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,15,0.01309866706530253
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,15,0.011695999652147293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,15,0.012784000486135483
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,15,0.013376000026861826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,15,0.012879999975363413
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,15,0.0129120002190272
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,15,0.011445333560307821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,15,0.011407999942700068
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,15,0.01119999960064888
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,15,0.011264000087976456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,15,0.011312000453472137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,31,0.013343999783198038
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,31,0.013354666531085968
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,31,0.012389333297808966
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,31,0.013183999806642532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,31,0.011039999624093374
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,31,0.01118933285276095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,31,0.013290667285521826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,31,0.011333333949247995
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,31,0.013093333691358566
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,31,0.01302933320403099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,31,0.012944000462690989
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,31,0.011205332974592844
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,15,0.011221333096424738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,31,0.011029332876205444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,31,0.012757333616415659
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,15,0.013093333691358566
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,31,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,63,0.013061333447694778
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,63,0.012890666723251343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,63,0.013061333447694778
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,63,0.011173332730929056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,63,0.011050666371981302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,63,0.012784000486135483
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,63,0.010858666151762009
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,63,0.012773333738247553
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,63,0.012917333592971167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,63,0.012821332861979803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,63,0.011173332730929056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,63,0.012602667013804117
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,63,0.010954666882753372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,63,0.011152000476916632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,31,0.01102399950226148
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,63,0.010784000158309937
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,63,0.011002667248249054
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,127,0.013999999811251959
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,127,0.012938667088747025
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,127,0.012879999975363413
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,127,0.012821332861979803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,127,0.012074666718641916
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,127,0.011178666104873022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,127,0.012448000411192576
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,127,0.011962667107582092
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,127,0.012837332983811697
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,127,0.010944000134865442
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,127,0.011226666470368704
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,127,0.011034666250149408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,127,0.010821333775917688
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,127,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,127,0.011152000476916632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,127,0.012853333105643591
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,255,0.013061333447694778
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,255,0.012938667088747025
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,255,0.011989332735538483
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,255,0.010837333897749582
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,255,0.012928000340859095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,255,0.010826667149861654
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,255,0.013002666334311167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,255,0.012879999975363413
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,255,0.01119999960064888
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,255,0.011407999942700068
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,255,0.010949333508809408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,255,0.011477333803971609
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,255,0.011328000575304031
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,255,0.011317333827416102
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,511,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,511,0.01526933287580808
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,511,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,511,0.013232000172138214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,511,0.012847999731699625
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,255,0.013178666432698568
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,511,0.012906666845083237
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,511,0.0129120002190272
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,511,0.015029333531856537
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,511,0.013893333574136099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,255,0.011237333218256632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,511,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,511,0.01310933381319046
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,511,0.011066666493813196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,511,0.01303999995191892
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,511,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,1023,0.021231998999913532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,511,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,1023,0.017290666699409485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,1023,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,1023,0.015061333775520325
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,1023,0.013658666362365087
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,1023,0.015237333873907724
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,1023,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,1023,0.013397333522637686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,1023,0.017765333255132038
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,1023,0.015274666249752045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,511,0.012826666235923767
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,1023,0.013237333546082178
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,1023,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,1023,0.013440000514189402
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,1023,0.013077333569526672
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,2047,0.027615999182065327
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,2047,0.02147199958562851
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,1023,0.013061333447694778
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,2047,0.017456000049908955
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,2047,0.015178666760524115
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,2047,0.015423999478419622
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,2047,0.01568000018596649
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,2047,0.015365333606799444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,2047,0.02146133283774058
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,2047,0.015834666788578033
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,2047,0.01746133342385292
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,1023,0.01322666679819425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,2047,0.01498666654030482
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,2047,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,2047,0.01368533323208491
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,2047,0.013477332890033722
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,2047,0.015413332730531693
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,2047,0.017136000096797943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,4095,0.023546665906906128
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,4095,0.029733332494894665
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,4095,0.023898666103680927
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,4095,0.019381333142518997
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,4095,0.01889066646496455
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,4095,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,4095,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,4095,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,4095,0.02146133283774058
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,4095,0.021194666624069214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,4095,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,4095,0.017194667210181553
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,4095,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,4095,0.017279999951521557
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,4095,0.015493333339691162
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,8191,0.030074665943781536
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,8191,0.030026666820049286
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,8191,0.03268266717592875
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,8191,0.02552533398071925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,8191,0.023647998770078022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,8191,0.023418667415777843
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,8191,0.023743999501069386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,8191,0.023445333043734234
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,4095,0.016565332810084026
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,8191,0.023354666928450268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,8191,0.023434666295846302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,8191,0.020442667106787365
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,8191,0.019120000302791595
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,8191,0.02094399929046631
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,8191,0.019152000546455383
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,8191,0.019359999646743137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,8191,0.027280000348885853
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,16383,0.04929600159327189
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,16383,0.058464000622431435
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,16383,0.04888000090916952
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,16383,0.0479360024134318
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,16383,0.046906664967536926
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,16383,0.04693333307902018
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,16383,0.046021332343419395
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,16383,0.03160533308982849
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,16383,0.029472000896930695
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,16383,0.03177600105603536
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,16383,0.02518400053183238
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,16383,0.027349332968393963
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,16383,0.024325333535671234
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,16383,0.023120000958442688
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,16383,0.02348800003528595
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,32767,0.0737066666285197
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,32767,0.07678933441638947
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,16383,0.05215999980767568
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,32767,0.09431466460227966
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,32767,0.07217599948247273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,32767,0.07177599767843883
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,32767,0.07055999835332234
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,32767,0.07039999961853027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,32767,0.049029335379600525
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,32767,0.048325334986050926
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,32767,0.05203733344872793
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,32767,0.041221333046754204
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,32767,0.03988266736268997
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,32767,0.040762667854626976
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,32767,0.07021866738796234
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,32767,0.03979199876387914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,32767,0.03994133323431015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,65535,0.11939733227094014
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,65535,0.1271573305130005
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,65535,0.16141866644223532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,65535,0.11852799852689107
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,65535,0.1199679970741272
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,65535,0.11812800168991089
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,65535,0.11823466420173645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,65535,0.1186346709728241
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,65535,0.07291733225186665
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,65535,0.07020799815654755
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,65535,0.08224000036716461
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,65535,0.061861331264177956
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,65535,0.06261333326498668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,65535,0.0611413319905599
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,8,1,1,131071,0.21633066733678183
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,8,1,2,131071,0.2380160093307495
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,65535,0.06230400005976359
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,65535,0.06098666787147522
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,8,1,4,131071,0.299621323744456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,8,1,8,131071,0.22708799441655478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,8,1,16,131071,0.22659732898076376
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,8,1,32,131071,0.22684800624847412
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,8,1,64,131071,0.22608532508214316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,8,1,128,131071,0.22606933116912842
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,8,1,1,131071,0.11964266498883565
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,8,1,2,131071,0.1146506667137146
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,8,1,4,131071,0.14447466532389322
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,8,1,8,131071,0.10442133744557698
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,8,1,16,131071,0.1027733286221822
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,8,1,32,131071,0.1029919981956482
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,8,1,64,131071,0.10380799571673076
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,8,1,128,131071,0.10325866937637329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,1,0.012960000584522883
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,1,0.01310933381319046
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,1,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,1,0.011237333218256632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,1,0.012954667210578918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,1,0.010933333386977514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,1,0.011242666592200598
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,1,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,1,0.011605333536863327
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,1,0.011231999844312668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,1,0.01121066634853681
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,1,0.01121066634853681
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,1,0.011584000041087469
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,1,0.015392000476519266
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,1,0.011381333072980246
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,3,0.01578666642308235
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,3,0.013477332890033722
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,3,0.013594667116800943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,3,0.01267733300725619
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,1,0.01339200014869372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,3,0.012778667112191519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,3,0.011530666301647821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,3,0.012639999389648438
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,3,0.013477332890033722
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,3,0.013290667285521826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,3,0.0129120002190272
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,3,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,3,0.01099733387430509
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,3,0.010992000500361124
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,3,0.01108266661564509
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,3,0.01119999960064888
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,7,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,3,0.011711999773979187
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,7,0.013189333180586496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,7,0.011109333485364914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,7,0.012714666624863943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,7,0.01128000020980835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,7,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,7,0.014015999933083853
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,7,0.012949333836634954
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,7,0.012991999586423239
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,7,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,7,0.012815999488035837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,7,0.011962667107582092
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,7,0.011578666667143503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,7,0.011391999820868174
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,7,0.01097600037852923
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,15,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,15,0.014890667051076889
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,15,0.013647999614477158
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,15,0.012800000607967377
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,15,0.017386666188637417
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,15,0.011285333583752314
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,15,0.012789333860079447
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,15,0.013157332936922709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,7,0.01121066634853681
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,15,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,15,0.012858666479587555
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,15,0.011141333729028702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,15,0.01129066695769628
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,15,0.010842667271693548
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,15,0.011461333682139715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,31,0.01594666639963786
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,15,0.014069333672523499
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,31,0.013248000293970108
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,31,0.013418667018413544
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,31,0.013349333157142004
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,31,0.011034666250149408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,31,0.012186666329701742
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,31,0.011578666667143503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,15,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,31,0.015418666104475657
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,31,0.013072000195582708
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,31,0.012863999853531519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,31,0.011141333729028702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,31,0.011087999989589056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,31,0.010874666273593903
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,31,0.011045332998037338
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,31,0.011370666325092316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,63,0.015450666348139444
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,63,0.01312000056107839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,63,0.013093333691358566
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,63,0.012543999900420507
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,63,0.011274666835864386
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,63,0.012975999464591345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,63,0.011077333241701126
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,63,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,63,0.013141332815090815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,63,0.013631999492645264
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,63,0.012383999923865
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,63,0.012991999586423239
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,63,0.011370666325092316
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,63,0.010751999914646149
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,63,0.011071999867757162
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,63,0.010922666639089584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,127,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,127,0.014864000181357065
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,31,0.011146667102972666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,127,0.013274667163689932
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,127,0.011007999380429586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,127,0.01301866645614306
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,127,0.012890666723251343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,127,0.014661333213249842
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,127,0.0129120002190272
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,127,0.013232000172138214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,127,0.010890666395425797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,127,0.012821332861979803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,127,0.011413333316644033
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,127,0.01121066634853681
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,127,0.01099733387430509
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,127,0.010821333775917688
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,255,0.016656000167131424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,255,0.013167999684810638
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,255,0.012917333592971167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,127,0.011498666057984034
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,255,0.013061333447694778
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,255,0.015311999867359797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,255,0.011674666156371435
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,255,0.012960000584522883
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,255,0.011333333949247995
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,255,0.01322666679819425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,255,0.013104000439246496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,255,0.013157332936922709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,255,0.010981333752473196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,255,0.011055999745925268
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,255,0.011205332974592844
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,255,0.011130666981140772
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,255,0.011087999989589056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,511,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,511,0.01754666616519292
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,511,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,511,0.013616000612576803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,511,0.01350933313369751
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,511,0.012991999586423239
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,511,0.013445333888133367
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,511,0.015520000209410986
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,511,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,511,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,511,0.01320533330241839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,511,0.013125333935022354
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,511,0.012869333227475485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,511,0.012901333471139273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,511,0.012746666868527731
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,1023,0.025946666797002155
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,1023,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,1023,0.018618666877349217
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,1023,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,1023,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,1023,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,511,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,1023,0.015125333021084467
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,1023,0.015557333827018738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,1023,0.01950399950146675
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,1023,0.01735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,1023,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,1023,0.01322666679819425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,1023,0.013386666774749756
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,1023,0.012863999853531519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,1023,0.01312000056107839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,1023,0.012938667088747025
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,2047,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,2047,0.027210667729377747
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,2047,0.02271999915440877
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,2047,0.019050666441520054
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,2047,0.01740266631046931
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,2047,0.017616000026464462
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,2047,0.022298666338125866
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,2047,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,2047,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,2047,0.017535999417304993
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,2047,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,2047,0.01718933383623759
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,2047,0.014266667266686758
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,2047,0.014885333677132925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,4095,0.027717334528764088
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,2047,0.015370666980743408
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,4095,0.02939733366171519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,2047,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,4095,0.021509334444999695
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,4095,0.02364266663789749
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,4095,0.02142400046189626
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,4095,0.02350933353106181
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,4095,0.023423999547958374
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,4095,0.02346666653951009
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,4095,0.0233599990606308
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,4095,0.021168000996112823
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,4095,0.01937599976857503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,4095,0.017701332767804463
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,4095,0.019621333728233974
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,4095,0.02942933390537898
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,4095,0.01736533393462499
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,8191,0.0495413343111674
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,8191,0.05142400165398916
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,8191,0.05548266569773356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,8191,0.047279998660087585
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,8191,0.045328001181284584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,8191,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,4095,0.016997333616018295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,8191,0.04529066880544027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,8191,0.03178133318821589
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,8191,0.02775466690460841
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,8191,0.029135999580224354
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,8191,0.023530667026837666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,8191,0.021738665799299877
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,8191,0.04594666759173075
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,8191,0.021514666577180225
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,8191,0.021503999829292297
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,16383,0.07389866809050243
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,16383,0.09407466650009155
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,16383,0.07487999896208446
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,16383,0.07417599856853485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,16383,0.07292800148328145
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,16383,0.07252266506354015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,16383,0.07745066781838734
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,16383,0.07157333195209503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,16383,0.048783997694651283
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,8191,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,16383,0.048298666874567665
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,16383,0.049829334020614624
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,16383,0.039546666045983635
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,16383,0.039887999494870506
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,16383,0.03973866750796636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,16383,0.03973866750796636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,16383,0.039434666434923805
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,32767,0.12657599647839865
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,32767,0.12640532851219177
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,32767,0.1698346734046936
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,32767,0.12155200044314067
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,32767,0.1202186644077301
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,32767,0.11980799833933513
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,32767,0.12157866358757019
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,32767,0.11972266435623169
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,32767,0.07391466697057088
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,32767,0.07256533205509186
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,32767,0.08344533046086629
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,32767,0.06278933087984721
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,32767,0.06262399752934773
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,32767,0.062405332922935486
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,32767,0.06225066880385081
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,32767,0.06182399888833364
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,65535,0.22684266169865927
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,65535,0.22593599557876587
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,65535,0.3203360040982564
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,65535,0.21900266408920288
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,65535,0.21798932552337646
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,65535,0.21708800395329794
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,65535,0.2176533341407776
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,65535,0.2164799968401591
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,65535,0.12653866410255432
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,65535,0.11512000362078349
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,65535,0.15229333440462747
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,65535,0.1039573351542155
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,65535,0.10342400272687276
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,65535,0.10328533252080281
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,65535,0.10376532872517903
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,65535,0.10329066713651021
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,16,1,1,131071,0.4309013287226359
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,16,1,2,131071,0.4280053377151489
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,16,1,4,131071,0.6231786807378134
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,16,1,8,131071,0.415120005607605
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,16,1,16,131071,0.4131466547648112
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,16,1,32,131071,0.4129386742909749
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,16,1,64,131071,0.414250651995341
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,16,1,128,131071,0.4126559893290202
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,16,1,1,131071,0.22763733069101968
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,16,1,2,131071,0.20303465922673544
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,16,1,4,131071,0.2911626696586609
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,16,1,16,131071,0.18617600202560425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,16,1,32,131071,0.1871359944343567
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,16,1,64,131071,0.18585066000620523
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,1,0.015008000036080679
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,16,1,128,131071,0.18570667505264282
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,1,0.016143999993801117
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,1,0.013199999928474426
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,1,0.012858666479587555
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,1,0.012847999731699625
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,16,1,8,131071,0.18774932622909546
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,1,0.014746667196353277
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,1,0.012869333227475485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,1,0.013141332815090815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,1,0.013653332988421122
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,1,0.013189333180586496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,1,0.011253333340088526
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,1,0.012831999609867731
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,1,0.011018666128317514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,1,0.012997332960367203
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,3,0.014997333288192749
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,1,0.012981332838535309
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,3,0.01573866605758667
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,3,0.014560000350077948
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,3,0.012901333471139273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,3,0.01322666679819425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,3,0.013023999830087027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,3,0.012890666723251343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,1,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,3,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,3,0.013002666334311167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,3,0.013408000270525614
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,3,0.011365332951148352
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,3,0.013167999684810638
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,3,0.013082666943470636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,3,0.01310933381319046
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,7,0.013776000589132309
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,7,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,7,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,7,0.013189333180586496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,3,0.010944000134865442
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,7,0.012954667210578918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,7,0.012896000097195307
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,7,0.013408000270525614
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,7,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,7,0.013455999394257864
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,7,0.012890666723251343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,7,0.01309866706530253
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,3,0.01341333364446958
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,7,0.012362666428089142
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,7,0.01322666679819425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,7,0.01118933285276095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,7,0.01119999960064888
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,7,0.013088000317414602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,15,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,15,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,15,0.013573333621025085
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,15,0.013637332866589228
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,15,0.012853333105643591
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,15,0.01333333303531011
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,15,0.013104000439246496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,15,0.015146666516860327
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,15,0.013157332936922709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,15,0.01322666679819425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,15,0.012954667210578918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,15,0.01110400011142095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,15,0.012975999464591345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,15,0.011264000087976456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,15,0.013002666334311167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,31,0.014991999914248785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,31,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,31,0.013141332815090815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,31,0.013072000195582708
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,31,0.013141332815090815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,31,0.012928000340859095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,31,0.013183999806642532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,31,0.014975999792416891
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,31,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,31,0.014005333185195923
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,31,0.013125333935022354
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,15,0.012837332983811697
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,31,0.01328533391157786
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,31,0.012714666624863943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,31,0.012975999464591345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,31,0.011157333850860596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,63,0.01509333277742068
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,63,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,31,0.012949333836634954
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,63,0.013141332815090815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,63,0.012906666845083237
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,63,0.013125333935022354
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,63,0.013088000317414602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,63,0.015568000574906668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,63,0.013845333208640417
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,63,0.01349866638580958
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,63,0.012954667210578918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,63,0.012901333471139273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,63,0.01126933346192042
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,63,0.01268799975514412
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,63,0.011642667154471079
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,63,0.011125333607196808
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,127,0.01310933381319046
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,127,0.01522133375207583
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,63,0.013616000612576803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,127,0.012869333227475485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,127,0.012810666114091873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,127,0.013173333058754602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,127,0.012863999853531519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,127,0.013077333569526672
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,127,0.013232000172138214
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,127,0.013034666577974955
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,127,0.013242666920026144
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,127,0.013050666699806849
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,127,0.013264000415802002
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,127,0.01129066695769628
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,127,0.012810666114091873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,127,0.012698666503032049
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,255,0.021327999730904896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,255,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,255,0.014943999548753103
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,255,0.013338666409254074
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,255,0.013002666334311167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,255,0.013141332815090815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,255,0.01292266696691513
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,255,0.012757333616415659
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,127,0.01525866612792015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,255,0.013274667163689932
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,255,0.013167999684810638
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,255,0.011551999797423681
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,255,0.011535999675591787
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,255,0.011648000528415045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,255,0.02141333371400833
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,255,0.012757333616415659
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,511,0.01945066700379054
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,511,0.017103999853134155
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,511,0.015509333461523056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,511,0.015722667177518208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,255,0.011354666203260422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,511,0.021583999196688335
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,511,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,511,0.0216799999276797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,511,0.015610666324694952
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,511,0.01571200042963028
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,511,0.015018666783968607
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,511,0.013877333452304205
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,511,0.015135999768972397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,511,0.013290667285521826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,511,0.013605333864688873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,511,0.015119999647140503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,511,0.015109332899252573
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,1023,0.025727999707063038
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,1023,0.021157334248224895
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,1023,0.017450666675964992
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,1023,0.01699200024207433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,1023,0.01710933322707812
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,1023,0.017050666113694508
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,1023,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,1023,0.0232640008131663
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,1023,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,1023,0.017242666333913803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,1023,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,1023,0.01303999995191892
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,1023,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,1023,0.013007999708255133
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,1023,0.014981333166360855
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,2047,0.03054400036732356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,2047,0.027466667195161183
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,2047,0.02869333326816559
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,1023,0.025493333737055462
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,2047,0.02276266614596049
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,2047,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,2047,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,2047,0.025605333348115284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,2047,0.02170666555563609
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,2047,0.021359999974568684
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,2047,0.01706133286158244
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,2047,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,2047,0.015066667149464289
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,2047,0.02125866711139679
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,2047,0.015706667055686314
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,2047,0.015002666662136713
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,4095,0.05017599960168203
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,4095,0.056757330894470215
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,4095,0.04414399961630503
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,2047,0.021205333371957142
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,4095,0.043807998299598694
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,4095,0.043525333205858864
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,4095,0.050986667474110924
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,4095,0.0347680002450943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,4095,0.0278613343834877
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,4095,0.04359999795754751
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,4095,0.02163200080394745
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,4095,0.02080533280968666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,4095,0.043840001026789345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,4095,0.019424000134070713
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,4095,0.02093333254257838
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,4095,0.021066665649414062
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,8191,0.07623466849327087
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,8191,0.07762133578459422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,8191,0.09435199697812398
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,8191,0.07217599948247273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,8191,0.07050666709740956
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,8191,0.07143466671307881
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,8191,0.07029866675535838
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,4095,0.029648000995318096
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,8191,0.052517334620157875
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,8191,0.048298666874567665
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,8191,0.05054933329423269
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,8191,0.03884266565243403
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,8191,0.03819733361403147
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,8191,0.036831999818483986
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,8191,0.036730666955312095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,8191,0.0690719981988271
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,8191,0.03734400123357773
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,16383,0.12826666235923767
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,16383,0.17141334215799967
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,16383,0.12255466977755229
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,16383,0.12180266777674358
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,16383,0.12159466743469238
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,16383,0.1304586629072825
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,16383,0.12426666418711345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,16383,0.0778186668952306
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,16383,0.07048533360163371
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,16383,0.08470933636029561
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,16383,0.06009600063165029
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,16383,0.058917333682378135
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,16383,0.1225279966990153
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,16383,0.058549334605534874
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,16383,0.059194669127464294
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,32767,0.22761066754659018
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,32767,0.23150932788848877
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,32767,0.3187359968821208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,32767,0.22493332624435425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,32767,0.223578671614329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,32767,0.22588799397150675
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,32767,0.22390933831532797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,32767,0.22347732384999594
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,16383,0.05899733304977417
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,32767,0.11927466591199239
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,32767,0.15406399965286255
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,32767,0.10578133662541707
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,32767,0.10397866368293762
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,32767,0.10418132940928142
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,32767,0.10379200180371602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,32767,0.10368000467618306
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,65535,0.43375468254089355
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,32767,0.1299679974714915
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,65535,0.6223413149515787
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,65535,0.42846934000651044
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,65535,0.42738668123881024
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,65535,0.4267359972000122
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,65535,0.42399998505910236
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,65535,0.4343893527984619
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,65535,0.42793599764506024
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,65535,0.22845866282780966
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,65535,0.21385065714518228
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,65535,0.29071466128031415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,65535,0.19150400161743164
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,65535,0.19075733423233032
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,65535,0.19100799163182577
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,65535,0.19042134284973145
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,65535,0.18953599532445273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,32,1,1,131071,0.8407573699951172
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,32,1,2,131071,0.8522506554921468
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,32,1,4,131071,1.2270399729410808
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,32,1,8,131071,0.8286773363749186
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,32,1,16,131071,0.8286026318868002
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,32,1,32,131071,0.8271626631418864
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,32,1,64,131071,0.8260479768117269
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,32,1,128,131071,0.8223573366800944
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,32,1,1,131071,0.43352532386779785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,32,1,2,131071,0.3997279802958171
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,32,1,4,131071,0.5593173503875732
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,32,1,8,131071,0.36588799953460693
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,32,1,16,131071,0.36429333686828613
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,32,1,32,131071,0.3654346863428752
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,32,1,64,131071,0.36473600069681805
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,1,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,1,0.01639466608564059
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,1,0.013248000293970108
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,1,0.01404800017674764
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,1,0.013199999928474426
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,1,0.013786666095256805
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,32,1,128,131071,0.3641333182652791
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,1,0.01695466662446658
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,1,0.01481066644191742
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,1,0.012965332716703415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,1,0.012981332838535309
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,1,0.012602667013804117
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,1,0.012736000120639801
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,1,0.011509332805871964
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,1,0.012991999586423239
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,3,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,3,0.014922666052977243
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,3,0.014959999670584997
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,3,0.014287999520699183
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,1,0.015050667027632395
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,3,0.013210666676362356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,3,0.013130666067202887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,3,0.012847999731699625
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,3,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,3,0.01463466634353002
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,3,0.012928000340859095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,3,0.012815999488035837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,3,0.013077333569526672
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,3,0.011077333241701126
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,3,0.012810666114091873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,3,0.011653333902359009
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,7,0.014970666418472925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,1,0.013178666432698568
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,7,0.014917333920796713
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,7,0.013647999614477158
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,7,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,7,0.013466666142145792
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,7,0.013274667163689932
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,3,0.012997332960367203
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,7,0.017658667018016178
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,7,0.013973332941532135
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,7,0.015184000134468079
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,7,0.013365333278973898
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,7,0.013280000537633896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,7,0.014858666807413101
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,7,0.013237333546082178
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,7,0.012874666601419449
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,15,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,15,0.015141333142916361
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,15,0.01685333376129468
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,15,0.013861333330472311
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,7,0.013173333058754602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,15,0.013663999736309052
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,15,0.013045333325862885
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,15,0.01368533323208491
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,15,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,15,0.01709866647919019
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,15,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,7,0.013317332913478216
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,15,0.01310933381319046
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,15,0.013023999830087027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,15,0.012453333785136541
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,15,0.01331199953953425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,15,0.01322666679819425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,15,0.013066666821638743
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,31,0.01618133361140887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,31,0.012885333349307379
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,31,0.013088000317414602
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,31,0.012842666357755661
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,31,0.013274667163689932
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,31,0.01524266724785169
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,31,0.01301866645614306
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,31,0.017711999515692394
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,31,0.015461333096027374
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,31,0.013674666484196981
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,31,0.015482666591803232
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,31,0.013151999562978745
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,31,0.012965332716703415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,31,0.012847999731699625
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,31,0.01314666618903478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,63,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,31,0.01303999995191892
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,63,0.01562133307258288
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,63,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,63,0.013397333522637686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,63,0.013674666484196981
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,63,0.013946666071812311
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,63,0.01302933320403099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,63,0.017504000415404636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,63,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,63,0.01321600005030632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,63,0.01422400027513504
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,63,0.01370666672786077
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,63,0.013013333082199097
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,63,0.013104000439246496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,127,0.015418666104475657
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,127,0.015024000157912573
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,63,0.012853333105643591
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,127,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,127,0.014826666563749313
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,127,0.012960000584522883
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,63,0.013258667041858038
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,127,0.013679999858140945
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,127,0.015706667055686314
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,127,0.01551466683546702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,127,0.015263999501864115
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,127,0.013317332913478216
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,127,0.01320533330241839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,127,0.013370666652917862
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,127,0.013151999562978745
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,127,0.01228800043463707
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,255,0.01703466723362605
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,255,0.02143999934196472
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,127,0.012928000340859095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,255,0.014645333091417948
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,127,0.0170666662355264
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,255,0.013584000368913015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,255,0.013989333063364029
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,255,0.013839999834696451
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,255,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,255,0.01926933353145917
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,255,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,255,0.013338666409254074
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,255,0.014576000471909841
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,255,0.013045333325862885
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,255,0.012106666962305704
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,255,0.013557333499193192
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,255,0.012815999488035837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,511,0.019519999623298645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,511,0.025722667574882507
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,511,0.01942933350801468
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,255,0.012970666090647379
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,511,0.01811733345190684
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,511,0.017269333203633625
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,511,0.016879999389251072
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,511,0.01716800034046173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,511,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,511,0.023381332556406658
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,511,0.015376000354687372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,511,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,511,0.013370666652917862
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,511,0.013248000293970108
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,511,0.013301332791646322
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,511,0.013978666315476099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,1023,0.02363733450571696
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,1023,0.02959466725587845
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,1023,0.030447999636332195
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,1023,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,1023,0.019573333362738293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,1023,0.02117866774400075
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,1023,0.01961600035429001
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,1023,0.019434666881958645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,1023,0.023285334308942158
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,1023,0.0242399995525678
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,1023,0.019472000499566395
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,1023,0.017562666287024815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,1023,0.015471999843915304
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,1023,0.015194666882356008
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,1023,0.015216000378131866
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,511,0.01937066639463107
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,1023,0.01573866605758667
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,2047,0.044954667488733925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,2047,0.05314666529496511
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,2047,0.05478399991989136
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,2047,0.045706664522488914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,2047,0.042650664846102394
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,2047,0.04359999795754751
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,2047,0.04321600000063578
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,2047,0.02943466603755951
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,2047,0.029637334247430164
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,2047,0.0258240004380544
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,2047,0.021418665846188862
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,2047,0.019461333751678467
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,2047,0.04378133515516917
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,2047,0.01959466685851415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,2047,0.01923199991385142
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,2047,0.018960000326236088
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,4095,0.07135466734568278
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,4095,0.09077866872151692
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,4095,0.07271466652552287
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,4095,0.07221866647402446
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,4095,0.07180800040562947
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,4095,0.07874666651089986
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,4095,0.07171733180681865
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,4095,0.06901333232720692
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,4095,0.04770666857560476
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,4095,0.051914667089780174
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,4095,0.05041066805521647
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,4095,0.037962667644023895
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,4095,0.038149334490299225
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,4095,0.03764266769091288
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,8191,0.1239306628704071
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,8191,0.13309866189956665
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,4095,0.039077334105968475
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,8191,0.12564266721407572
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,8191,0.12288000186284383
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,8191,0.12141866485277812
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,8191,0.12223466237386067
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,4095,0.03734400123357773
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,8191,0.12309867143630981
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,8191,0.07427733143170674
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,8191,0.07480533421039581
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,8191,0.08396266897519429
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,8191,0.061162665486335754
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,8191,0.05940799911816915
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,8191,0.05945600072542826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,8191,0.05898133416970571
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,8191,0.058373332023620605
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,16383,0.22366933027903238
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,16383,0.2334346572558085
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,16383,0.3140160044034322
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,16383,0.23035200436909994
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,16383,0.22508267561594644
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,16383,0.22713599602381387
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,16383,0.22354666392008463
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,8191,0.16607466340065002
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,16383,0.2246560057004293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,16383,0.12487467130025227
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,16383,0.12308800220489502
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,16383,0.15250133474667868
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,16383,0.10659733414649963
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,16383,0.10458133618036906
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,16383,0.10431999961535136
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,16383,0.10410133004188538
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,16383,0.1042133371035258
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,32767,0.4264533519744873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,32767,0.43678398927052814
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,32767,0.4291146596272786
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,32767,0.4270933469136556
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,32767,0.42470399538675946
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,32767,0.42553067207336426
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,32767,0.42508800824483234
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,32767,0.22502400477727255
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,32767,0.2141653299331665
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,32767,0.28853867451349896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,32767,0.19222400585810342
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,32767,0.6131413380304972
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,32767,0.19051732619603476
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,32767,0.19074134031931558
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,32767,0.18964266777038574
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,32767,0.1894986629486084
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,65535,0.8343946933746338
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,65535,0.8515626589457194
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,65535,1.2082026799519856
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,65535,0.8294453620910645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,65535,0.8254079818725586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,65535,0.8244586785634359
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,65535,0.8250666459401449
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,65535,0.8247733116149902
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,65535,0.4285546541213989
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,65535,0.40283199151357013
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,65535,0.5560319821039835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,65535,0.36690131823221844
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,65535,0.36561067899068195
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,65535,0.3648480176925659
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,65535,0.3650826613108317
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,65535,0.3642186721165975
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,64,1,1,131071,1.653205394744873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,64,1,2,131071,1.6592532793680828
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,64,1,4,131071,2.402751922607422
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,64,1,8,131071,1.6218560536702473
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,64,1,16,131071,1.6166666348775227
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,64,1,32,131071,1.6193493207295735
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,64,1,64,131071,1.611786683400472
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,64,1,1,131071,0.8319946924845377
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,64,1,128,131071,1.6161173184712727
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,64,1,2,131071,0.7796746889750162
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,64,1,4,131071,1.0998293558756511
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,64,1,8,131071,0.7163840134938558
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,64,1,16,131071,0.7142613728841146
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,64,1,32,131071,0.7154133319854736
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,64,1,64,131071,0.7133599917093912
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,1,0.017055999487638474
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,1,0.021365332106749218
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,1,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,1,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,64,1,128,131071,0.7135573228200277
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,1,0.015322666615247726
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,1,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,1,0.014736000448465347
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,1,0.014778666198253632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,1,0.025248001019159954
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,1,0.017488000293572743
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,1,0.014767999450365702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,1,0.013194666554530462
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,1,0.015957333147525787
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,1,0.012965332716703415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,1,0.012954667210578918
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,3,0.020026666422684986
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,3,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,3,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,1,0.019253333409627277
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,3,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,3,0.014965333044528961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,3,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,3,0.025759999950726826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,3,0.017573333034912746
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,3,0.016832000265518825
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,3,0.01402666668097178
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,3,0.01357866699496905
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,3,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,3,0.013717333475748697
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,3,0.013183999806642532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,3,0.013125333935022354
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,7,0.0194560003777345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,7,0.01730666682124138
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,7,0.021946666141351063
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,7,0.0169813334941864
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,7,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,7,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,7,0.01515199989080429
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,3,0.019199999670187633
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,7,0.02515200028816859
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,7,0.01915733392039935
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,7,0.01933866615096728
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,7,0.01331199953953425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,7,0.013605333864688873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,7,0.013104000439246496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,7,0.015157333264748255
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,15,0.01970133309563001
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,15,0.017642666896184284
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,15,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,15,0.01575999955336253
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,7,0.012986666212479273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,7,0.013178666432698568
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,15,0.015226667126019796
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,15,0.015098666151364645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,15,0.015279999623696009
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,15,0.02515733242034912
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,15,0.017045332739750545
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,15,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,15,0.014773332824309668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,15,0.012917333592971167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,15,0.013237333546082178
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,15,0.013269333789745966
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,31,0.019402666638294857
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,31,0.01725333308180173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,31,0.021397332350413006
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,15,0.014666666587193808
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,31,0.01505600040157636
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,31,0.014848000059525171
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,31,0.015077333897352219
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,31,0.014853333433469137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,31,0.025450666745503742
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,31,0.017301333447297413
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,31,0.019626667102177937
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,15,0.013104000439246496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,31,0.01328533391157786
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,31,0.013210666676362356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,31,0.012997332960367203
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,31,0.013370666652917862
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,63,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,63,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,63,0.021781332790851593
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,63,0.016970666746298473
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,31,0.016965333372354507
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,63,0.01492799942692121
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,63,0.015290666371583939
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,31,0.013445333888133367
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,63,0.023056000471115112
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,63,0.01729600007335345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,63,0.018863999595244724
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,63,0.013354666531085968
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,63,0.013450667262077332
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,63,0.015295999745527903
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,63,0.013002666334311167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,63,0.02548266698916753
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,63,0.01312000056107839
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,63,0.01292266696691513
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,127,0.017237332959969837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,127,0.02161066730817159
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,127,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,127,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,127,0.015333333363135656
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,127,0.019546666493018467
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,127,0.015013333410024643
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,127,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,127,0.025237334271272022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,127,0.017866666118303936
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,127,0.019413333386182785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,127,0.013450667262077332
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,127,0.01331199953953425
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,127,0.013141332815090815
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,127,0.013114667187134424
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,127,0.012874666601419449
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,255,0.023541333774725597
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,255,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,255,0.021920000513394673
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,255,0.015306666493415833
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,255,0.015189333508412043
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,255,0.015200000256299973
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,255,0.015599999576807022
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,255,0.02759466568628947
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,255,0.01959466685851415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,255,0.019280000279347103
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,255,0.01321600005030632
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,255,0.01523200049996376
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,255,0.013066666821638743
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,255,0.013306666165590286
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,255,0.013461332768201828
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,511,0.031285333136717476
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,511,0.03081600119670232
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,511,0.019519999623298645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,255,0.013269333789745966
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,511,0.019002666076024372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,511,0.018511999398469925
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,511,0.018133333573738735
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,511,0.019205333044131596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,511,0.03044266750415166
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,511,0.021557333568731945
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,511,0.015210667004187902
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,511,0.015301333119471868
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,511,0.015498666713635126
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,511,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,511,0.01516266663869222
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,511,0.021903999149799347
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,1023,0.04857600231965383
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,1023,0.05421866476535797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,1023,0.043807998299598694
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,511,0.01532799998919169
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,1023,0.04154133299986521
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,1023,0.041834667325019836
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,1023,0.048512001832326256
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,1023,0.04101333270470301
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,1023,0.04079466561476389
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,1023,0.02733866622050603
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,1023,0.03143466760714849
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,1023,0.01966399947802226
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,1023,0.019178666174411774
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,1023,0.01921066641807556
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,1023,0.019248000035683315
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,2047,0.07770666480064392
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,2047,0.07378666599591573
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,2047,0.09531199932098389
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,1023,0.04173333446184794
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,2047,0.06810133159160614
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,2047,0.06886399785677592
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,2047,0.06914666791756947
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,2047,0.06756799916426341
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,2047,0.05650666852792104
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,2047,0.048026666045188904
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,2047,0.05375466744105021
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,2047,0.03852266569932302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,2047,0.03782933453718821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,1023,0.019733333339293797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,2047,0.037274666130542755
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,2047,0.07116800049940745
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,2047,0.036831999818483986
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,2047,0.036159999668598175
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,4095,0.1276533305644989
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,4095,0.1269813378651937
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,4095,0.16832532485326132
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,4095,0.12072533369064331
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,4095,0.11822400490442912
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,4095,0.11891200145085652
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,4095,0.11886399984359741
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,4095,0.11940266688664754
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,4095,0.0707893321911494
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,4095,0.08865066369374593
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,4095,0.06026133398214976
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,4095,0.058304001887639366
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,4095,0.05791999896367391
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,4095,0.05959466596444448
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,4095,0.08340266346931458
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,4095,0.05788266658782959
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,8191,0.2313973307609558
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,8191,0.22700266043345133
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,8191,0.3184960087140401
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,8191,0.22131733099619547
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,8191,0.2200160026550293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,8191,0.2206826607386271
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,8191,0.2187839945157369
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,8191,0.222271998723348
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,8191,0.1350986659526825
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,8191,0.11890133221944173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,8191,0.15545599659283957
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,8191,0.10388799508412679
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,8191,0.1036906639734904
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,8191,0.10296533505121867
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,8191,0.10548266768455505
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,8191,0.10377599795659383
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,16383,0.43466134866078693
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,16383,0.4330293337504069
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,16383,0.6175839900970459
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,16383,0.4245599905649821
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,16383,0.4209173520406087
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,16383,0.41787731647491455
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,16383,0.4187946716944377
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,16383,0.23693867524464926
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,16383,0.21196800470352173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,16383,0.2942879994710286
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,16383,0.19088000059127808
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,16383,0.42100266615549725
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,16383,0.19020267327626547
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,16383,0.18917866547902426
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,16383,0.1885813275973002
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,16383,0.19034665822982788
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,32767,0.842298666636149
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,32767,0.8460000356038412
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,32767,1.2531466484069824
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,32767,0.8242560227711996
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,32767,0.8189280033111572
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,32767,0.8193066914876302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,32767,0.8205066521962484
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,32767,0.8157066504160563
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,32767,0.3997386693954468
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,32767,0.5659466584523519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,32767,0.43984532356262207
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,32767,0.36506132284800213
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,32767,0.36423468589782715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,32767,0.36422932147979736
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,32767,0.36419200897216797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,32767,0.3627946774164836
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,65535,1.6648160616556804
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,65535,1.6536426544189453
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,65535,1.6123092969258626
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,65535,2.446730613708496
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,65535,1.5977600415547688
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,65535,1.597599983215332
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,65535,1.6031306584676106
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,65535,0.8473119735717773
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,65535,1.5995786984761555
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,65535,0.777786652247111
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,65535,1.119909365971883
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,65535,0.7138613065083822
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,65535,0.7118399937947592
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,65535,0.7120800018310547
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,65535,0.7141973177591959
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,65535,0.7119519710540771
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,128,1,1,131071,3.3006505966186523
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,128,1,2,131071,3.364858627319336
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,128,1,4,131071,4.9202985763549805
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,128,1,8,131071,3.2522560755411782
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,128,1,16,131071,3.2365760803222656
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,128,1,32,131071,3.238032023111979
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,128,1,64,131071,3.242645263671875
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,128,1,128,131071,3.2406721115112305
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,128,1,1,131071,1.6563146909077961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,128,1,2,131071,1.525269349416097
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,128,1,8,131071,1.4102080663045247
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,128,1,4,131071,2.2610559463500977
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,128,1,16,131071,1.4105013211568196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,128,1,32,131071,1.4048585891723633
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,1,0.029279999434947968
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,1,0.024165332317352295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,1,0.03355200091997782
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,128,1,64,131071,1.4070773124694824
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,1,0.022255999346574146
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,1,0.021253332495689392
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,128,1,128,131071,1.4055360158284504
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,1,0.020928000410397846
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,1,0.02021866664290428
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,1,0.04307200014591217
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,1,0.025333332518736523
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,1,0.02784000088771184
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,1,0.01916266605257988
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,1,0.016757333030303318
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,1,0.021290667355060577
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,1,0.015722667177518208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,3,0.029877332349618275
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,3,0.023189333577950794
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,1,0.016415999581416447
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,3,0.03154666721820831
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,3,0.02109866589307785
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,1,0.016389333953460056
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,3,0.01942933350801468
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,3,0.02080533280968666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,3,0.01982933282852173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,3,0.01932799940307935
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,3,0.02719466636578242
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,3,0.027744000156720478
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,3,0.01926400015751521
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,3,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,3,0.016437333077192307
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,3,0.041637333730856575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,3,0.01599466676513354
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,7,0.029557332396507263
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,7,0.023141334454218548
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,7,0.031445334355036415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,7,0.021312000850836437
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,3,0.017322666943073273
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,7,0.01979200045267741
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,7,0.021354667842388153
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,7,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,7,0.02120000123977661
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,7,0.043578664461771645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,7,0.025461333493391674
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,7,0.02733866622050603
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,7,0.01904533306757609
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,7,0.01714133347074191
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,7,0.015344000111023584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,7,0.01704000060757001
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,7,0.015168000012636185
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,15,0.022970666488011677
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,15,0.0323786661028862
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,15,0.021322667598724365
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,15,0.020053333292404812
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,15,0.020432000358899433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,15,0.029711998999118805
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,15,0.02142400046189626
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,15,0.04181333382924398
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,15,0.02716800073782603
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,15,0.0189280000825723
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,15,0.020645332833131153
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,15,0.01714666684468587
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,15,0.01687466725707054
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,15,0.01743999992807706
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,15,0.025253333151340485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,15,0.017221332838137943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,31,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,31,0.03155199935038885
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,31,0.02124800036350886
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,31,0.020687999824682873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,31,0.021295999487241108
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,31,0.021488000949223835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,31,0.029861333469549816
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,31,0.021146667500336964
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,31,0.04228800038496653
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,31,0.026346666117509205
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,31,0.027562665442625683
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,31,0.01966399947802226
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,31,0.017994667092959087
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,31,0.017029333859682083
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,31,0.017130666722853977
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,31,0.01722666621208191
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,63,0.02937600016593933
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,63,0.023200000325838726
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,63,0.022042666872342426
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,63,0.021018666525681812
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,63,0.02075200031201045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,63,0.020554666717847187
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,63,0.041850666205088295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,63,0.03162666658560435
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,63,0.025621332228183746
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,63,0.02752000093460083
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,63,0.019237333287795384
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,63,0.02147199958562851
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,63,0.0173333336909612
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,63,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,127,0.03181866556406021
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,127,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,63,0.01735466718673706
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,127,0.03200533241033554
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,127,0.02237333357334137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,127,0.021216000119845074
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,127,0.020421333611011505
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,127,0.020106667031844456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,127,0.04433066646258036
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,63,0.016506666938463848
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,127,0.026133333643277485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,127,0.027749332288901012
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,127,0.018922666708628338
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,127,0.017594666530688603
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,127,0.016751999656359356
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,127,0.017429333180189133
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,127,0.019189332922299702
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,127,0.016869333883126576
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,255,0.02951466788848241
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,255,0.03302400062481562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,255,0.021136000752449036
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,255,0.0210506667693456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,255,0.0194560003777345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,255,0.019658666104078293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,255,0.043738668163617454
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,255,0.02107200026512146
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,255,0.046122665206591286
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,255,0.029690665503342945
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,255,0.027562665442625683
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,255,0.016575999557971954
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,255,0.017082666357358296
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,255,0.017114666601022083
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,255,0.017338667064905167
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,511,0.05620799958705902
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,255,0.019194666296243668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,511,0.052341332038243614
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,511,0.059119999408721924
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,511,0.04515733321507772
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,511,0.04327466587225596
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,511,0.04277333120505015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,511,0.042821332812309265
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,511,0.041984001795450844
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,511,0.056159997979799904
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,511,0.04065066576004028
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,511,0.03773866593837738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,511,0.02420799930890401
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,511,0.021370666722456615
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,511,0.01974933346112569
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,511,0.019274666905403137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,1023,0.0848640004793803
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,1023,0.07898133496443431
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,511,0.021498667697111767
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,1023,0.070783997575442
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,1023,0.06991466879844666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,1023,0.06898666421572368
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,1023,0.06773866713047028
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,1023,0.06844800213972728
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,1023,0.06965866684913635
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,1023,0.05481066803137461
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,1023,0.058143998185793556
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,1023,0.03826666623353958
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,1023,0.03809066613515218
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,1023,0.09623466928799947
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,1023,0.03698666642109553
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,1023,0.037802666425704956
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,1023,0.04072533299525579
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,2047,0.13699199755986533
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,2047,0.13085866967837015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,2047,0.171999990940094
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,2047,0.12238933642705281
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,2047,0.12076266606648763
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,2047,0.11880532900492351
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,2047,0.11994133392969768
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,2047,0.11915199955304463
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,2047,0.09775466720263164
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,2047,0.09409067034721375
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,2047,0.0646613339583079
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,2047,0.06233066817124685
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,2047,0.06154666841030121
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,2047,0.06065600117047628
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,4095,0.24041066567103067
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,4095,0.2389226754506429
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,4095,0.3201013406117757
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,2047,0.06058666606744131
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,4095,0.22261333465576172
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,4095,0.22283732891082764
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,4095,0.22228266795476279
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,4095,0.22249066829681396
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,2047,0.08123733103275299
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,4095,0.2202613353729248
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,4095,0.150629331668218
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,4095,0.12916266918182373
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,4095,0.16301866372426352
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,4095,0.10947733124097188
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,4095,0.10603732864061992
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,4095,0.10556800166765849
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,4095,0.10498666763305664
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,4095,0.10411199927330017
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,8191,0.44647467136383057
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,8191,0.6202720006306967
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,8191,0.4262559811274211
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,8191,0.42337600390116376
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,8191,0.44361066818237305
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,8191,0.42601601282755536
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,8191,0.4233280022939046
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,8191,0.25391467412312824
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,8191,0.2251360019048055
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,8191,0.2983413338661194
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,8191,0.4233386516571045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,8191,0.19753599166870117
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,8191,0.193231999874115
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,8191,0.19357866048812866
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,8191,0.19217065970102945
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,8191,0.19217600425084433
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,16383,0.8577386538187662
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,16383,0.8583892981211344
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,16383,0.8283999760945638
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,16383,1.2278079986572266
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,16383,0.8224426905314127
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,16383,0.8268640041351318
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,16383,0.8235519727071127
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,16383,0.8290239969889323
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,16383,0.458352009455363
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,16383,0.566927989323934
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,16383,0.37345067660013836
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,16383,0.36793065071105957
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,16383,0.4141013224919637
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,16383,0.36708799997965497
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,16383,0.3675520022710164
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,16383,0.36640000343322754
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,32767,1.7061492602030437
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,32767,2.507861296335856
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,32767,1.6846240361531575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,32767,1.6483519872029622
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,32767,1.6379094123840332
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,32767,1.6453173955281575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,32767,1.6427839597066243
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,32767,0.8690773646036783
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,32767,0.7910772959391276
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,32767,1.6476426124572754
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,32767,1.1214133103688557
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,32767,0.7235466639200846
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,32767,0.7191572984059652
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,32767,0.7190186977386475
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,32767,0.718010663986206
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,32767,0.7195573647816976
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,256,1,1,65535,3.3347838719685874
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,256,1,2,65535,3.352927843729655
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,256,1,8,65535,3.248634656270345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,256,1,4,65535,5.205477396647136
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,256,1,16,65535,3.246255874633789
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,256,1,32,65535,3.232410748799642
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,256,1,64,65535,3.233994801839193
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,256,1,128,65535,3.229541460673014
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,256,1,1,65535,1.6892479260762532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,256,1,2,65535,1.547808011372884
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,256,1,8,65535,1.4285492897033691
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,256,1,4,65535,2.251610596974691
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,256,1,16,65535,1.4202240308125813
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,256,1,32,65535,1.421562671661377
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,1,0.04974933465321859
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,1,0.035962666074434914
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,1,0.05008533100287119
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,256,1,64,65535,1.4172852834065754
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,1,0.032885332902272545
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,1,0.02977599948644638
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,1,0.03028800090154012
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,1,0.02940800040960312
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,1,0.08017066617806752
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,1,0.04602666695912679
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,256,1,128,65535,1.4216906229654949
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,1,0.04528533418973287
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,1,0.02741866558790207
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,1,0.029285334050655365
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,1,0.023546665906906128
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,1,0.02346666653951009
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,1,0.02325333406527837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,3,0.04916266600290934
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,3,0.03748800108830134
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,3,0.05009066561857859
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,3,0.03186133255561193
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,3,0.029616000751654308
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,1,0.02298133323589961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,3,0.0296426663796107
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,3,0.02962133288383484
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,3,0.0296426663796107
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,3,0.0765226682027181
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,3,0.044693330923716225
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,3,0.02731200059254964
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,3,0.024165332317352295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,3,0.023498666783173878
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,3,0.023152001202106476
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,3,0.046256000796953835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,7,0.05016533533732096
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,7,0.03639466563860575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,7,0.05054399867852529
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,7,0.03202133377393087
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,3,0.023370665808518726
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,7,0.029493334392706554
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,7,0.02961066613594691
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,7,0.030799999833106995
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,7,0.07667199770609538
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,7,0.032287999987602234
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,7,0.046112000942230225
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,7,0.04399999976158142
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,7,0.027456000447273254
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,7,0.024874667326609295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,7,0.02309333284695943
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,7,0.02316266546646754
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,7,0.023397333920001984
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,15,0.03596800069014231
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,15,0.05077333251635233
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,15,0.03277866790692011
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,15,0.02926933268706004
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,15,0.029904000461101532
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,15,0.029690665503342945
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,15,0.049957334995269775
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,15,0.07645333309968312
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,15,0.04598933458328247
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,15,0.04534933467706045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,15,0.027471999327341717
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,15,0.02332799881696701
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,15,0.02342933416366577
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,15,0.029631999631722767
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,15,0.02345066765944163
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,31,0.04965866605440775
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,31,0.035631999373435974
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,31,0.04982399940490723
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,31,0.032314665615558624
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,31,0.030048000315825146
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,31,0.02959999938805898
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,31,0.030373332401116688
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,31,0.02956799914439519
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,31,0.07705600063006084
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,31,0.04620266457398733
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,31,0.04574933151404063
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,31,0.027237333357334137
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,31,0.025285333395004272
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,31,0.023408000667889912
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,31,0.023498666783173878
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,31,0.02325333406527837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,15,0.023520000278949738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,63,0.04979733129342397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,63,0.04965866605440775
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,63,0.03356266766786575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,63,0.029722665747006733
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,63,0.029322666426499683
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,63,0.03067733347415924
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,63,0.030239999294281006
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,63,0.03736533224582672
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,63,0.04587733248869578
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,63,0.04596266647179922
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,63,0.027402666707833607
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,63,0.02481599897146225
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,63,0.023232000569502514
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,63,0.023247999449570973
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,63,0.07482133309046428
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,127,0.05301333467165629
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,127,0.04369066655635834
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,127,0.05287466446558634
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,127,0.033887999753157295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,127,0.029520000020662945
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,63,0.023311999936898548
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,127,0.02934933453798294
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,127,0.030378667016824085
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,127,0.02945599953333537
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,127,0.07713599999745686
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,127,0.048485333720842995
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,127,0.02734400083621343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,127,0.023605334262053173
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,127,0.02350933353106181
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,127,0.02349333216746648
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,127,0.02309866746266683
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,127,0.04674666623274485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,255,0.05797333518664042
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,255,0.05807999769846598
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,255,0.04721599817276001
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,255,0.0432586669921875
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,255,0.06584533552328746
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,255,0.04284800092379252
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,255,0.08348799745241801
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,255,0.04160533348719279
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,255,0.05609600245952606
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,255,0.0480373352766037
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,255,0.042725334564844765
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,255,0.02346666653951009
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,255,0.023242667317390442
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,255,0.023530667026837666
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,511,0.08970666925112407
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,511,0.08807466427485149
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,511,0.09303999940554301
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,511,0.07295466462771098
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,511,0.06951466699441274
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,511,0.07238399982452393
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,255,0.029290666182835896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,511,0.06976533432801564
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,511,0.07128533224264781
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,255,0.023365333676338196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,511,0.09339732925097148
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,511,0.06845866640408833
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,511,0.06332799792289734
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,511,0.046181331078211464
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,511,0.04186666508515676
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,511,0.0396373321612676
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,511,0.039594667653242745
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,511,0.03984000037113825
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,1023,0.15994133551915488
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,1023,0.14008532961209616
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,1023,0.16269333163897196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,1023,0.12809066971143088
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,1023,0.12434132893880208
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,1023,0.11990933616956075
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,1023,0.12179199854532878
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,1023,0.12381333112716675
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,1023,0.1292746663093567
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,1023,0.09485866626103719
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,1023,0.09685867031415303
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,1023,0.0689386675755183
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,1023,0.06224533418814341
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,1023,0.06224533418814341
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,1023,0.06200533111890157
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,2047,0.23893866936365762
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,2047,0.2507893244425456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,1023,0.06449066599210103
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,2047,0.23149865865707397
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,2047,0.22852800289789835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,2047,0.22765332460403442
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,2047,0.225055992603302
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,2047,0.3004639943440755
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,2047,0.22627200682957968
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,2047,0.1673333247502645
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,2047,0.1450933317343394
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,2047,0.1604746679464976
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,2047,0.11417067050933838
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,2047,0.10966933767000835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,2047,0.10752532879511516
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,2047,0.10668266812960307
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,2047,0.10680533448855083
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,4095,0.42546133200327557
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,4095,0.469487984975179
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,4095,0.5658880074818929
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,4095,0.4394826491673787
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,4095,0.43467732270558673
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,4095,0.43301331996917725
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,4095,0.43581334749857586
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,4095,0.4322613477706909
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,4095,0.26572267214457196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,4095,0.2439253330230713
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,4095,0.20442666610081991
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,4095,0.19817066192626953
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,4095,0.1979680061340332
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,4095,0.19536532958348593
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,4095,0.2844799955685933
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,4095,0.19575466712315878
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,8191,0.802127997080485
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,8191,0.8915839989980062
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,8191,1.0971519947052002
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,8191,0.8486346403757731
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,8191,0.8401226997375488
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,8191,0.8346133232116699
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,8191,0.8385226726531982
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,8191,0.8409653504689535
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,8191,0.45161600907643634
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,8191,0.43514132499694824
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,8191,0.5230186780293783
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,8191,0.38603734970092773
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,8191,0.3789653380711873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,8191,0.3763999938964844
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,8191,0.37593066692352295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,8191,0.37491198380788165
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,16383,1.552677313486735
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,16383,1.7445707321166992
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,16383,1.6661492983500164
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,16383,2.271567980448405
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,16383,1.6579413414001465
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,16383,1.6515092849731445
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,16383,1.6583946545918782
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,16383,1.6504106521606445
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,16383,0.8205493291219076
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,16383,0.8189813296000162
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,16383,1.0056053002675374
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,16383,0.7416053613026937
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,16383,0.7359360059102377
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,16383,0.7322399616241455
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,16383,0.7328800360361735
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,16383,0.7315786679585775
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,512,1,1,32767,3.056847890218099
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,512,1,2,32767,3.4879252115885415
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,512,1,8,32767,3.333573341369629
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,512,1,4,32767,4.673317273457845
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,512,1,16,32767,3.32806396484375
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,512,1,32,32767,3.3168748219807944
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,512,1,64,32767,3.3214133580525718
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,512,1,128,32767,3.3212639490763345
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,512,1,1,32767,1.5592800776163738
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,512,1,2,32767,1.591610590616862
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,512,1,8,32767,1.4548746744791667
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,512,1,4,32767,2.0496692657470703
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,512,1,16,32767,1.4514880180358887
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,512,1,32,32767,1.4509973526000977
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,1,0.08302400012811025
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,1,0.06073066592216492
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,512,1,64,32767,1.4469386736551921
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,512,1,128,32767,1.44758939743042
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,1,0.05563200016816457
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,1,0.04896533489227295
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,1,0.04842133323351542
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,1,0.04800533254941305
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,1,0.047824000318845115
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,1,0.1402186652024587
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,1,0.08074666559696198
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,1,0.08298666775226593
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,1,0.04570133487383524
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,1,0.08947733044624329
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,1,0.037615999579429626
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,1,0.035818666219711304
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,1,0.035642666121323906
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,1,0.035616000493367515
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,3,0.058335999647776283
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,3,0.08900800347328186
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,3,0.055914665261904396
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,3,0.047925333182017006
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,3,0.04779199759165446
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,3,0.0823520024617513
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,3,0.04791999856630961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,3,0.047872001926104225
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,3,0.1402506629625956
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,3,0.08255466818809509
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,3,0.08302933474381764
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,3,0.045994664231936135
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,3,0.0376800000667572
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,3,0.035690667728583016
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,3,0.03557866563399633
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,3,0.035743998984495796
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,7,0.08188266555468242
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,7,0.05996266504128774
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,7,0.09124799569447835
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,7,0.053904001911481224
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,7,0.048063998421033226
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,7,0.04790933430194855
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,7,0.047685335079828896
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,7,0.04789333542188009
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,7,0.1402186652024587
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,7,0.08225599924723308
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,7,0.08288000027338664
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,7,0.04610666632652283
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,7,0.03749333322048187
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,7,0.03547733277082443
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,7,0.035546667873859406
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,15,0.08285333216190338
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,15,0.05958933134873708
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,15,0.09092799822489421
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,15,0.05421866476535797
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,7,0.035749333600203194
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,15,0.04800533254941305
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,15,0.04837333162625631
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,15,0.04799999793370565
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,15,0.04757333298524221
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,15,0.08099733293056488
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,15,0.08272000153859456
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,15,0.04696000119050344
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,15,0.03749866783618927
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,15,0.03575466573238373
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,15,0.1398293375968933
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,15,0.03537066777547201
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,31,0.08285866677761078
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,31,0.059008002281188965
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,31,0.09035733342170715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,31,0.05412800113360087
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,31,0.047983999053637184
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,31,0.048250665267308555
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,31,0.04808533191680908
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,31,0.047872001926104225
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,31,0.14019200205802917
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,15,0.03568533311287562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,31,0.08268266419569652
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,31,0.08285333216190338
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,31,0.04586133360862732
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,31,0.03570133447647095
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,31,0.03494933247566223
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,31,0.03581333408753077
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,63,0.0827893316745758
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,63,0.05969599882761637
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,63,0.09057066837946574
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,31,0.0377813329299291
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,63,0.05420800050099691
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,63,0.04794133206208547
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,63,0.04829333225886027
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,63,0.04814399778842926
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,63,0.047695999344189964
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,63,0.1381333371003469
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,63,0.08250133196512859
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,63,0.08297066887219746
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,63,0.04569066564242045
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,63,0.03739733248949051
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,63,0.03591466695070267
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,63,0.03530666728814443
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,63,0.03565866748491923
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,127,0.08804266651471455
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,127,0.06902400155862172
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,127,0.09213333328564961
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,127,0.06048533320426941
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,127,0.05593599875768026
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,127,0.056613331039746605
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,127,0.05459733307361603
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,127,0.05398400127887726
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,127,0.13963733116785684
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,127,0.08293333152929942
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,127,0.08494933446248372
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,127,0.04946133494377136
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,127,0.03762666632731756
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,127,0.035946667194366455
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,127,0.03572266548871994
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,127,0.03510399907827377
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,255,0.11071999867757161
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,255,0.09434666236241658
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,255,0.0990773340066274
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,255,0.06950933237870534
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,255,0.06876266499360402
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,255,0.06850666801134746
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,255,0.06846400101979573
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,255,0.06704000135262807
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,255,0.1487626632054647
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,255,0.08418132861455281
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,255,0.05226133267084757
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,255,0.04398933549722036
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,255,0.04232533276081085
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,255,0.09753066301345825
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,255,0.041493333876132965
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,255,0.04012800008058548
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,511,0.16165866454442343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,511,0.14597333470980325
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,511,0.16745599110921225
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,511,0.1255466639995575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,511,0.12201600273450215
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,511,0.11852266391118367
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,511,0.11911466717720032
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,511,0.11942933003107707
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,511,0.17151999473571777
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,511,0.11574400464693706
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,511,0.11146666606267293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,511,0.0742986649274826
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,511,0.06541866560777028
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,511,0.06277333199977875
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,511,0.062080000837643944
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,1023,0.2606079975763957
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,1023,0.24572267134984335
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,1023,0.30873600641886395
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,511,0.06090133388837179
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,1023,0.2145599921544393
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,1023,0.21114667256673178
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,1023,0.20967467625935873
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,1023,0.21015999714533487
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,1023,0.22078933318456015
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,1023,0.164000004529953
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,1023,0.17504000663757324
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,1023,0.21753066778182983
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,1023,0.1081813375155131
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,1023,0.10492799679438274
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,1023,0.10309333602587382
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,1023,0.10288533568382263
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,2047,0.45872000853220624
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,2047,0.4535733461380005
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,2047,0.5827146768569946
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,2047,0.4216373364130656
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,2047,0.41309332847595215
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,2047,0.4113706747690837
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,1023,0.11725333333015442
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,2047,0.4090133508046468
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,2047,0.409168004989624
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,2047,0.3189866741498311
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,2047,0.25753066937128705
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,2047,0.3030400077501933
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,2047,0.20384534200032553
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,2047,0.19262933731079102
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,2047,0.18920000394185385
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,2047,0.18790932496388754
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,2047,0.18724799156188965
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,4095,0.8347946802775065
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,4095,0.8550933202107748
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,4095,1.1099786758422852
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,4095,0.8072213331858317
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,4095,0.8005867004394531
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,4095,0.7989813486735026
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,4095,0.7992640336354574
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,4095,0.7942879994710287
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,4095,0.5156000057856241
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,4095,0.4349973201751709
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,4095,0.5488160053888956
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,4095,0.3718133370081584
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,4095,0.3614293336868286
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,4095,0.3558880090713501
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,4095,0.3540053367614746
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,4095,0.3564586639404297
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,8191,1.58681058883667
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,8191,1.6574559211730957
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,8191,2.1781867345174155
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,8191,1.5861066182454426
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,8191,1.565776030222575
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,8191,1.577797253926595
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,8191,1.5461494127909343
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,8191,1.567914644877116
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,8191,0.8840693632761637
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,8191,0.7885333697001139
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,8191,1.0244320233662922
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,8191,0.7039519945780436
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,8191,0.6928213437398275
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,8191,0.6891413529713949
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,8191,0.6860427061716715
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,8191,0.6852160294850668
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,128,1024,1,1,16383,3.086634635925293
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,64,1024,1,2,16383,3.2178773880004883
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,32,1024,1,4,16383,4.541119893391927
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,16,1024,1,8,16383,3.1322666803995767
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,8,1024,1,16,16383,3.1202774047851562
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,4,1024,1,32,16383,3.0846986770629883
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,2,1024,1,64,16383,3.0785814921061196
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,float16,1,1024,1,128,16383,3.1067466735839844
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,128,1024,1,1,16383,1.6245439847310383
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,64,1024,1,2,16383,1.4915893872578938
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,16,1024,1,8,16383,1.3675999641418457
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,32,1024,1,4,16383,1.990063985188802
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,8,1024,1,16,16383,1.3565972646077473
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,4,1024,1,32,16383,1.3527305920918782
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,2,1024,1,64,16383,1.3504586219787598
TRTLLM,1.2.0rc6,NVIDIA GB200,mla_generation,default,float16,fp8,1,1024,1,128,16383,1.3478026390075684
