framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,1,1,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,1,2,0,0.015568000574906668
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,1,4,0,0.014837333311637243
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,1,8,0,0.014906667172908783
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,1,16,0,0.014912000546852747
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,1,32,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,1,64,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,1,128,0,0.015247999380032221
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,1,1,0,0.019205333044131596
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,1,2,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,1,4,0,0.01923199991385142
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,1,8,0,0.017360000560681026
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,1,16,0,0.0170666662355264
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,1,32,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,1,64,0,0.01894933357834816
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,1,128,0,0.01738133281469345
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,16,2,0,0.01643199970324834
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,16,1,0,0.016501333564519882
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,16,4,0,0.01509333277742068
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,16,8,0,0.015130666395028433
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,16,16,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,16,32,0,0.014949332922697067
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,16,64,0,0.015173333386580149
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,16,128,0,0.015018666783968607
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,16,2,0,0.018922666708628338
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,16,1,0,0.01884799947341283
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,16,4,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,16,8,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,16,16,0,0.018911999960740406
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,16,32,0,0.018757333358128864
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,16,64,0,0.01899733394384384
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,16,128,0,0.018901333212852478
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,32,1,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,32,2,0,0.01525866612792015
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,32,4,0,0.015168000012636185
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,32,8,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,32,16,0,0.015200000256299973
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,32,32,0,0.015189333508412043
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,32,128,0,0.015135999768972397
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,32,64,0,0.014901333798964819
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,32,1,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,32,2,0,0.02176533391078313
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,32,8,0,0.019466667125622433
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,32,16,0,0.019567999988794327
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,32,32,0,0.019039999693632126
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,32,64,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,32,4,0,0.019685332973798115
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,32,128,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,64,1,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,64,4,0,0.017103999853134155
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,64,8,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,64,16,0,0.017616000026464462
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,64,32,0,0.017530667285124462
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,64,64,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,64,2,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,64,128,0,0.017498667041460674
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,64,1,0,0.020261333634455998
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,64,2,0,0.019306667149066925
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,64,4,0,0.01971199984351794
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,64,8,0,0.019487999379634857
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,64,16,0,0.020362666497627895
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,64,32,0,0.019808000574509304
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,64,64,0,0.019152000546455383
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,64,128,0,0.01924266666173935
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,128,1,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,128,4,0,0.01714133347074191
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,128,2,0,0.017263999829689663
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,128,8,0,0.017082666357358296
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,128,16,0,0.01674666628241539
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,128,32,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,128,64,0,0.01714666684468587
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,128,128,0,0.017071999609470367
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,128,1,0,0.023589332898457844
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,128,2,0,0.023402666052182514
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,128,4,0,0.021589333812395733
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,128,8,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,128,16,0,0.02164799968401591
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,128,64,0,0.01945066700379054
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,128,32,0,0.019029332945744198
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,128,128,0,0.019845332950353622
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,256,4,0,0.01932266727089882
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,256,2,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,256,1,0,0.02142400046189626
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,256,8,0,0.019167999426523846
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,256,16,0,0.018933333456516266
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,256,32,0,0.019359999646743137
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,256,64,0,0.019482667247454327
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,256,128,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,256,1,0,0.02980799973011017
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,256,4,0,0.0236160010099411
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,256,8,0,0.023631999890009563
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,256,16,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,256,32,0,0.02380799998839696
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,256,64,0,0.02309866746266683
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,256,128,0,0.022367998957633972
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,512,1,0,0.038634667793909706
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,512,2,0,0.025413334369659424
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,256,2,0,0.02536533276240031
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,512,4,0,0.023354666928450268
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,512,8,0,0.02364266663789749
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,512,16,0,0.02313599983851115
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,512,32,0,0.023610666394233704
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,512,128,0,0.021727999051411945
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,512,1,0,0.05890133480230967
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,512,64,0,0.021722666919231415
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,512,2,0,0.03161066770553589
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,512,4,0,0.029520000020662945
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,512,8,0,0.02736533433198929
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,512,16,0,0.025653332471847534
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,512,32,0,0.027509334186712902
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,512,64,0,0.0277813325325648
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,512,128,0,0.02573866645495097
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,1024,1,0,0.0836853285630544
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,1024,2,0,0.051685333251953125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,1024,4,0,0.03197333216667175
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,1024,8,0,0.029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,1024,16,0,0.029669334491093952
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,1024,32,0,0.02945599953333537
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,1024,64,0,0.02977066735426585
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,1024,128,0,0.02976000060637792
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,1024,2,0,0.0681386689345042
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,1024,1,0,0.11471999684969585
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,1024,4,0,0.037733333806196846
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,1024,8,0,0.03402666747570038
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,1024,16,0,0.03364799916744232
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,1024,32,0,0.033376000821590424
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,1024,64,0,0.031871999303499855
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,1024,128,0,0.03201066702604294
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,1536,1,0,0.14069333672523499
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,1536,2,0,0.08555733164151509
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,1536,4,0,0.0525546669960022
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,1536,8,0,0.0378560001651446
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,1536,16,0,0.03770666569471359
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,1536,32,0,0.036917333801587425
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,1536,64,0,0.03585600107908249
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,1536,128,0,0.035504000882307686
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,1536,1,0,0.17805866400400797
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,1536,2,0,0.10390399893124898
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,1536,4,0,0.06125866870085398
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,1536,8,0,0.04193066557248434
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,1536,16,0,0.04012800008058548
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,1536,32,0,0.03752533346414566
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,1536,64,0,0.03801066676775614
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,1536,128,0,0.03839466720819473
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,2048,1,0,0.2074399987856547
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,2048,2,0,0.12243200341860454
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,2048,8,0,0.04493333399295807
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,2048,4,0,0.07736533383528392
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,2048,16,0,0.043663998444875084
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,2048,32,0,0.04331199824810028
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,2048,64,0,0.041706666350364685
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,2048,128,0,0.042223999897638954
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,2048,2,0,0.14283200105031332
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,2048,1,0,0.24804266293843588
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,2048,8,0,0.04818133513132731
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,2048,4,0,0.08593066533406575
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,2048,16,0,0.04558933277924856
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,2048,32,0,0.0440586656332016
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,2048,64,0,0.04248533149560293
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,2048,128,0,0.04211199780305227
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,3072,2,0,0.2137226661046346
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,3072,1,0,0.38149865468343097
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,3072,8,0,0.08062399923801422
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,3072,4,0,0.129530668258667
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,3072,16,0,0.05682133138179779
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,3072,32,0,0.05641066531340281
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,3072,64,0,0.05564799904823303
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,3072,128,0,0.05423999826113383
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,3072,2,0,0.23382933934529623
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,3072,1,0,0.4139626820882161
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,3072,4,0,0.13753599921862283
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,3072,8,0,0.08319999774297078
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,3072,16,0,0.05827199916044871
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,3072,32,0,0.05609600245952606
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,3072,64,0,0.0540533314148585
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,3072,128,0,0.053770666321118675
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,4096,2,0,0.3307146628697713
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,4096,8,0,0.12507200241088867
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,4096,4,0,0.19416000445683798
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,4096,1,0,0.6004960139592489
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,4096,16,0,0.07253866891066234
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,4096,32,0,0.06923200190067291
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,4096,64,0,0.06935999790827434
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,4096,128,0,0.06671466430028279
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,4096,1,0,0.6250880161921183
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,4096,4,0,0.2015893260637919
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,4096,8,0,0.12054399649302165
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,4096,2,0,0.3473120133082072
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,4096,16,0,0.07262399792671204
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,4096,32,0,0.06878399848937988
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,4096,64,0,0.06656000018119812
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,4096,128,0,0.0652159998814265
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,6144,2,0,0.640122652053833
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,6144,8,0,0.21735999981562296
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,6144,4,0,0.35685332616170246
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,6144,1,0,1.3346506754557292
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,6144,16,0,0.13134400049845377
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,6144,32,0,0.0977226694424947
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,6144,64,0,0.09473599990208943
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,6144,128,0,0.1058186690012614
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,6144,4,0,0.3415786822636922
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,6144,2,0,0.6073919932047526
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,6144,1,0,1.1355306307474773
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,6144,8,0,0.20548266172409058
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,6144,16,0,0.12612799803415933
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,6144,32,0,0.0904960036277771
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,6144,64,0,0.08678399523099263
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,6144,128,0,0.08682666222254436
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,8192,4,0,0.5733493169148763
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,8192,2,0,1.021183967590332
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,8192,1,0,1.9179466565450032
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,8192,8,0,0.33262399832407635
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,8192,16,0,0.2103466590245565
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,8192,32,0,0.1293226679166158
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,8192,64,0,0.12126933534940083
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,8192,128,0,0.12019733587900798
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,8192,4,0,0.5198506514231364
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,8192,2,0,0.9453653494517008
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,8192,1,0,1.7839946746826172
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,8192,8,0,0.305567999680837
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,8192,16,0,0.19318399826685587
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,8192,32,0,0.1172160009543101
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,8192,64,0,0.11125333110491435
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,8192,128,0,0.10931199789047241
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,10240,4,0,0.8277920087178549
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,10240,2,0,1.9403732617696126
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,10240,8,0,0.4691306749979655
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,10240,1,0,4.314592043558757
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,10240,16,0,0.28996266921361286
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,10240,32,0,0.1705440084139506
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,10240,64,0,0.14695466558138529
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,10240,128,0,0.2364906668663025
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,10240,4,0,0.7369120121002197
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,10240,8,0,0.4240266482035319
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,10240,2,0,1.3617439270019531
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,10240,16,0,0.2653759916623433
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,10240,32,0,0.15691199898719788
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,10240,1,0,2.8674774169921875
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,10240,64,0,0.13267200191815695
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,10240,128,0,0.13085333506266275
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,12288,8,0,0.633621335029602
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,12288,4,0,1.1036427021026611
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,12288,16,0,0.3834559917449951
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,12288,32,0,0.2898026704788208
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,12288,2,0,3.309306780497233
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,12288,64,0,0.1781760056813558
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,12288,128,0,0.17147733767827353
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,12288,1,0,6.73739751180013
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,12288,4,0,0.971839984258016
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,12288,2,0,1.8320693969726562
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,12288,8,0,0.555237332979838
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,12288,16,0,0.3340746561686198
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,12288,32,0,0.21422932545344034
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,12288,1,0,4.582607905069987
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,12288,64,0,0.15793599685033163
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,12288,128,0,0.15265599886576334
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,16384,8,0,1.013973315556844
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,16384,4,0,2.035546620686849
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,16384,16,0,0.5882026751836141
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,16384,64,0,0.25301865736643475
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,16384,32,0,0.5655573209126791
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,16384,2,0,5.900255839029948
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,16384,128,0,0.22506133715311685
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,16384,4,0,1.5633759498596191
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,16384,2,0,3.8493385314941406
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,16384,8,0,0.9205226898193359
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,16384,1,0,12.275450388590494
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,16384,32,0,0.3256959915161133
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,16384,16,0,0.5111466646194458
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,16384,64,0,0.21167999505996704
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,16384,128,0,0.19746132691701254
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,16384,1,0,7.828367869059245
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,1,32768,8,0,5.6947784423828125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,1,32768,16,0,2.236293315887451
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,1,32768,4,0,10.634186426798502
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,1,32768,32,0,1.0902132987976074
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,1,32768,64,0,0.6873493194580078
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,1,32768,128,0,0.5049440066019694
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,1,32768,2,0,24.497578938802082
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,1,32768,2,0,13.999786376953125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,1,32768,4,0,7.756981531778972
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,1,32768,8,0,3.253909428914388
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,1,32768,16,0,1.5442879994710286
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,1,32768,32,0,0.8993386427561442
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,1,32768,64,0,0.5987093448638916
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,1,32768,128,0,0.38685333728790283
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,1,1,0,0.020549333343903225
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,1,2,0,0.017509333789348602
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,1,4,0,0.017279999951521557
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,1,8,0,0.015008000036080679
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,1,16,0,0.015157333264748255
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,1,32,0,0.014975999792416891
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,1,64,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,1,128,0,0.01504533365368843
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,1,1,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,1,2,0,0.019189332922299702
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,1,4,0,0.017429333180189133
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,1,8,0,0.01998399943113327
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,1,16,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,1,32768,1,0,31.39112599690755
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,1,32,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,1,64,0,0.021151999632517498
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,1,128,0,0.017055999487638474
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,16,1,0,0.01922133316596349
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,16,2,0,0.014970666418472925
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,16,4,0,0.015087999403476715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,16,8,0,0.01488000030318896
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,16,16,0,0.01691199963291486
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,16,32,0,0.01571200042963028
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,16,64,0,0.016197333733240765
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,16,128,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,16,1,0,0.02366400013367335
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,1,32768,1,0,50.037353515625
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,16,2,0,0.020090666910012562
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,16,4,0,0.019685332973798115
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,16,16,0,0.019445333629846573
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,16,32,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,16,8,0,0.019754666835069656
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,16,64,0,0.017808000246683758
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,16,128,0,0.01886933296918869
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,32,1,0,0.01978133370478948
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,32,2,0,0.01523200049996376
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,32,4,0,0.015599999576807022
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,32,8,0,0.01616000011563301
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,32,16,0,0.015717333803574245
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,32,32,0,0.017397332936525345
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,32,128,0,0.015344000111023584
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,32,64,0,0.015098666151364645
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,32,1,0,0.023792001108328503
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,32,2,0,0.019754666835069656
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,32,4,0,0.019461333751678467
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,32,8,0,0.021690666675567627
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,32,16,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,32,32,0,0.019178666174411774
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,32,64,0,0.020026666422684986
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,32,128,0,0.019199999670187633
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,64,1,0,0.02161066730817159
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,64,2,0,0.01756799966096878
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,64,4,0,0.01735466718673706
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,64,8,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,64,16,0,0.01777600000301997
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,64,32,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,64,64,0,0.01623999948302905
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,64,128,0,0.01729600007335345
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,64,1,0,0.026608000199000042
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,64,2,0,0.022096000611782074
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,64,4,0,0.021157334248224895
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,64,8,0,0.020981334149837494
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,64,16,0,0.020400000115235645
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,64,32,0,0.021594665944576263
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,64,64,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,64,128,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,128,1,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,128,2,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,128,4,0,0.017344000438849132
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,128,8,0,0.01695999999841054
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,128,16,0,0.01621333385507266
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,128,32,0,0.0174346665541331
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,128,128,0,0.01716800034046173
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,128,64,0,0.01752000053723653
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,128,2,0,0.02367466688156128
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,128,1,0,0.0316746657093366
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,128,4,0,0.023221333821614582
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,128,8,0,0.022154666483402252
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,128,16,0,0.021488000949223835
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,128,32,0,0.021856000026067097
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,128,64,0,0.019930666933457058
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,128,128,0,0.021418665846188862
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,256,2,0,0.023728000621000927
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,256,1,0,0.0332640012105306
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,256,8,0,0.021231998999913532
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,256,4,0,0.021514666577180225
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,256,16,0,0.020581333587567013
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,256,32,0,0.019797333826621372
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,256,64,0,0.01916266605257988
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,256,128,0,0.01964266722400983
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,256,1,0,0.052469333012898765
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,256,2,0,0.02976000060637792
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,256,4,0,0.02548266698916753
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,256,8,0,0.02565866708755493
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,256,16,0,0.02329600105683009
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,256,64,0,0.023530667026837666
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,256,32,0,0.024175999065240223
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,256,128,0,0.02327999969323476
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,512,1,0,0.06644799808661143
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,512,2,0,0.03997333347797394
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,512,4,0,0.025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,512,8,0,0.025381334125995636
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,512,16,0,0.02388266722361247
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,512,32,0,0.0252960001428922
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,512,64,0,0.023951999843120575
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,512,128,0,0.06704533100128174
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,512,1,0,0.09989333152770996
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,512,2,0,0.05820266902446747
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,512,4,0,0.03331200033426285
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,512,8,0,0.027450665831565857
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,512,16,0,0.027349332968393963
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,512,32,0,0.02606933315594991
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,512,64,0,0.02756800005833308
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,512,128,0,0.02555199960867564
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,1024,1,0,0.14458133776982626
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,1024,2,0,0.08546666304270427
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,1024,4,0,0.05314666529496511
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,1024,8,0,0.03201066702604294
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,1024,16,0,0.03158933420976003
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,1024,32,0,0.03159466634194056
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,1024,64,0,0.030133334298928578
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,1024,128,0,0.030042665700117748
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,1024,1,0,0.20258132616678873
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,1024,2,0,0.11652800440788269
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,1024,4,0,0.06772266825040181
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,1024,8,0,0.03813866774241129
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,1024,16,0,0.036133334040641785
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,1024,32,0,0.034448000291983284
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,1024,64,0,0.03164800008138021
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,1024,128,0,0.03175999969244003
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,1536,1,0,0.253546675046285
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,1536,2,0,0.14141333103179932
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,1536,4,0,0.08556800087292989
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,1536,16,0,0.0378560001651446
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,1536,8,0,0.052501335740089417
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,1536,32,0,0.03595199932654699
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,1536,64,0,0.03578133384386698
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,1536,128,0,0.036720000207424164
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,1536,1,0,0.3203093409538269
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,1536,2,0,0.17894933621088663
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,1536,4,0,0.10609599947929382
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,1536,8,0,0.06268799801667531
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,1536,16,0,0.04168533285458883
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,1536,32,0,0.04014399896065394
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,1536,64,0,0.03841600070397059
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,1536,128,0,0.03823466598987579
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,2048,1,0,0.38517868518829346
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,2048,2,0,0.21126933892567953
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,2048,4,0,0.12298132975896199
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,2048,8,0,0.07745600243409474
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,2048,16,0,0.045935998360315956
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,2048,32,0,0.043712000052134194
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,2048,64,0,0.04402133325735728
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,2048,128,0,0.043824002146720886
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,2048,1,0,0.4603360096613566
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,2048,2,0,0.25063467025756836
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,2048,4,0,0.14495467146237692
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,2048,8,0,0.08735466996828715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,2048,16,0,0.04997866849104563
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,2048,64,0,0.044165333112080894
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,2048,32,0,0.054773335655530296
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,2048,128,0,0.0444213350613912
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,3072,4,0,0.2171199917793274
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,3072,1,0,0.724064032236735
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,3072,8,0,0.1283253331979116
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,3072,2,0,0.3856053352355957
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,3072,16,0,0.07814399898052216
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,3072,32,0,0.05866133173306783
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,3072,128,0,0.0563679983218511
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,3072,64,0,0.056559999783833824
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,3072,2,0,0.41925867398579914
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,3072,8,0,0.1388320028781891
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,3072,4,0,0.2360853354136149
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,3072,1,0,0.7876213391621908
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,3072,16,0,0.08442133665084839
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,3072,64,0,0.056330665946006775
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,3072,32,0,0.058517331878344216
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,3072,128,0,0.054805333415667214
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,4096,8,0,0.19760000705718994
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,4096,4,0,0.3351253271102905
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,4096,1,0,1.1321439743041992
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,4096,2,0,0.6074399948120117
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,4096,16,0,0.12756799658139548
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,4096,32,0,0.07434666653474171
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,4096,64,0,0.07061333457628886
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,4096,128,0,0.07173333565394084
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,4096,2,0,0.6318933169047037
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,4096,8,0,0.20197866360346475
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,4096,4,0,0.34654398759206134
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,4096,1,0,1.203775962193807
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,4096,32,0,0.07380799949169159
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,4096,16,0,0.12360533078511556
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,4096,64,0,0.06823466718196869
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,4096,128,0,0.07056533296902974
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,6144,8,0,0.36419200897216797
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,6144,4,0,0.6361600160598755
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,6144,2,0,1.1855040391286213
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,6144,16,0,0.2182719906171163
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,6144,32,0,0.13649066289265951
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,6144,64,0,0.1192586620648702
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,6144,1,0,3.0631628036499023
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,6144,128,0,0.0956213374932607
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,6144,8,0,0.34669331709543866
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,6144,4,0,0.6087679862976074
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,6144,2,0,1.1408053239186604
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,6144,16,0,0.2084853251775106
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,6144,1,0,2.3920532862345376
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,6144,32,0,0.13089600205421448
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,6144,64,0,0.09538666407267253
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,6144,128,0,0.08943466345469157
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,8192,8,0,0.5732053518295288
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,8192,4,0,1.031824032465617
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,8192,2,0,2.9454612731933594
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,8192,16,0,0.33640531698862713
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,8192,32,0,0.21493866046269736
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,8192,64,0,0.1383999983469645
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,8192,128,0,0.12772267063458762
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,8192,1,0,6.311920166015625
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,8192,2,0,1.7930399576822917
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,8192,4,0,0.9411733150482178
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,8192,8,0,0.5259253184000651
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,8192,16,0,0.30828799804051715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,8192,1,0,4.726335843404134
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,8192,32,0,0.19817600647608438
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,8192,64,0,0.12238400181134541
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,8192,128,0,0.11468799908955891
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,10240,4,0,1.9932533899943035
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,10240,8,0,0.8293653329213461
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,10240,16,0,0.5687893231709799
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,10240,2,0,4.729311943054199
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,10240,64,0,0.1800266702969869
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,10240,32,0,0.2970186670621236
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,10240,128,0,0.1885653336842855
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,10240,1,0,9.365866978963217
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,10240,4,0,1.3464800516764324
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,10240,2,0,3.091007868448893
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,10240,16,0,0.42234134674072266
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,10240,8,0,0.7352533340454102
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,10240,64,0,0.1639893352985382
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,10240,32,0,0.33399466673533124
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,10240,128,0,0.13700266679128012
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,10240,1,0,6.7719573974609375
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,12288,4,0,2.069647947947184
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,12288,8,0,1.1393333276112874
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,12288,32,0,0.3821920156478882
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,12288,16,0,0.6395413478215536
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,12288,64,0,0.24160534143447876
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,12288,128,0,0.20041600863138834
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,12288,2,0,7.085205078125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,12288,4,0,1.840677261352539
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,12288,2,0,4.386309305826823
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,12288,8,0,0.98361603418986
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,12288,16,0,0.5589280128479004
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,12288,1,0,13.560479482014975
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,12288,32,0,0.34117865562438965
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,12288,64,0,0.21844265858332315
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,12288,128,0,0.16293332974116007
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,12288,1,0,9.607397079467773
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,16384,8,0,1.899328072865804
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,16384,4,0,6.097429275512695
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,16384,16,0,1.0125866731007893
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,16384,64,0,0.37809598445892334
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,16384,32,0,0.5943093299865723
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,16384,128,0,0.26977066198984784
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,16384,2,0,11.535663604736328
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,16384,4,0,3.579312006632487
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,16384,8,0,1.5676906903584797
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,16384,2,0,8.223349253336588
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,16384,16,0,0.9025920232137045
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,16384,32,0,0.5533813238143921
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,16384,64,0,0.331386665503184
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,16384,128,0,0.21645333369572958
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,16384,1,0,27.284678141276043
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,16384,1,0,16.082992553710938
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,2,32768,8,0,12.379567464192709
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,2,32768,16,0,6.356026967366536
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,2,32768,32,0,2.04909340540568
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,2,32768,64,0,1.1036907037099202
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,2,32768,4,0,24.81822967529297
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,2,32768,128,0,0.6957333087921143
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,2,32768,2,0,50.28692626953125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,2,32768,2,0,32.47314707438151
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,2,32768,4,0,15.954352060953775
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,2,32768,8,0,7.837439854939778
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,2,32768,32,0,1.5445013046264648
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,2,32768,16,0,3.4919894536336265
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,2,32768,64,0,0.9123679796854655
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,2,32768,128,0,0.582757314046224
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,1,1,0,0.02587733417749405
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,1,2,0,0.021477334201335907
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,1,4,0,0.01773333301146825
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,1,8,0,0.01569066693385442
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,1,16,0,0.015728000551462173
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,1,32,0,0.01651200031240781
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,1,64,0,0.01562133307258288
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,1,128,0,0.015583999454975128
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,1,1,0,0.027482666075229645
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,1,2,0,0.023232000569502514
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,1,4,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,1,8,0,0.01883200059334437
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,1,16,0,0.01711999997496605
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,1,32,0,0.017375999440749485
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,1,64,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,1,128,0,0.01720533271630605
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,16,1,0,0.02515200028816859
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,16,2,0,0.019317333896954853
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,16,4,0,0.01709866647919019
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,16,8,0,0.01676799977819125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,16,16,0,0.015450666348139444
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,16,32,0,0.015578666081031164
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,16,64,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,16,128,0,0.015311999867359797
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,16,1,0,0.028901333610216778
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,16,2,0,0.023029332359631855
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,16,4,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,16,8,0,0.019482667247454327
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,16,16,0,0.01939733326435089
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,16,32,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,16,64,0,0.019215999792019527
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,16,128,0,0.019061333189407986
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,32,1,0,0.025285333395004272
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,32,2,0,0.019109333554903667
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,32,4,0,0.015034666905800501
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,32,8,0,0.015205333630243937
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,2,32768,1,0,63.746612548828125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,2,32768,1,0,94.35012817382812
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,32,16,0,0.015658666690190632
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,32,32,0,0.017008000363906223
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,32,128,0,0.015562667200962702
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,32,64,0,0.015578666081031164
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,32,2,0,0.02535466601451238
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,32,1,0,0.03143466760714849
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,32,4,0,0.02120000123977661
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,32,8,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,32,16,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,32,32,0,0.019466667125622433
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,32,64,0,0.019093333433071773
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,32,128,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,64,1,0,0.029535998900731403
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,64,2,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,64,4,0,0.01721599946419398
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,64,8,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,64,16,0,0.014959999670584997
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,64,32,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,64,64,0,0.0163680004576842
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,64,128,0,0.01534933348496755
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,64,1,0,0.03757333258787791
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,64,2,0,0.027749332288901012
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,64,4,0,0.021418665846188862
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,64,8,0,0.021301334102948506
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,64,16,0,0.02197866638501485
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,64,32,0,0.021712000171343487
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,64,64,0,0.02107733239730199
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,64,128,0,0.019567999988794327
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,128,1,0,0.03385599950949351
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,128,4,0,0.018960000326236088
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,128,2,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,128,8,0,0.017573333034912746
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,128,16,0,0.017136000096797943
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,128,32,0,0.017338667064905167
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,128,64,0,0.017456000049908955
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,128,128,0,0.017514667163292568
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,128,1,0,0.056176001826922096
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,128,2,0,0.03219199925661087
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,128,4,0,0.023605334262053173
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,128,8,0,0.026127999027570088
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,128,16,0,0.0215786670645078
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,128,32,0,0.021338666478792827
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,128,64,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,128,128,0,0.021674667795499165
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,256,1,0,0.058559998869895935
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,256,2,0,0.03217600037654241
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,256,4,0,0.021733333667119343
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,256,8,0,0.02239466706911723
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,256,16,0,0.019519999623298645
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,256,32,0,0.021802666286627453
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,256,64,0,0.019717333217461903
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,256,128,0,0.019760000209013622
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,256,1,0,0.09413333733876546
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,256,2,0,0.05463466544946035
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,256,4,0,0.029680001238981884
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,256,8,0,0.025583999852339428
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,256,16,0,0.023344000180562336
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,256,32,0,0.023408000667889912
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,256,64,0,0.025589334468046825
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,256,128,0,0.023669332265853882
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,512,1,0,0.11306132872899373
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,512,2,0,0.066021333138148
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,512,4,0,0.0400693342089653
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,512,16,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,512,8,0,0.026122666895389557
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,512,32,0,0.023914667467276256
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,512,64,0,0.023647998770078022
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,512,128,0,0.02342933416366577
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,512,1,0,0.17707733313242593
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,512,2,0,0.10094933708508809
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,512,4,0,0.0587360014518102
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,512,16,0,0.02958400050799052
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,512,8,0,0.03190933416287104
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,512,32,0,0.02812800059715907
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,512,64,0,0.026122666895389557
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,512,128,0,0.02573866645495097
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,1024,1,0,0.26630399624506634
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,1024,2,0,0.14724266529083252
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,1024,4,0,0.08707732955614726
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,1024,8,0,0.054933334390322365
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,1024,32,0,0.03145066648721695
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,1024,16,0,0.031685332457224526
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,1024,64,0,0.0315786674618721
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,1024,128,0,0.03139200061559677
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,1024,4,0,0.11548800269762675
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,1024,2,0,0.2025173306465149
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,1024,1,0,0.3699359893798828
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,1024,8,0,0.06948799888292949
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,1024,16,0,0.03774933268626531
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,1024,32,0,0.033615998923778534
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,1024,64,0,0.03340800106525421
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,1024,128,0,0.03172266731659571
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,1536,2,0,0.25339200099309284
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,1536,1,0,0.4665919939676921
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,1536,4,0,0.14150399963061014
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,1536,8,0,0.08634666601816814
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,1536,16,0,0.05217066903909048
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,1536,32,0,0.03946666667858759
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,1536,64,0,0.03779733429352442
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,1536,128,0,0.03569599986076355
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,1536,1,0,0.6037066777547201
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,1536,2,0,0.32046933968861896
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,1536,4,0,0.1785866618156433
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,1536,8,0,0.10503466924031575
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,1536,16,0,0.06357866525650024
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,1536,32,0,0.042122667034467064
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,1536,64,0,0.04027199993530909
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,1536,128,0,0.03969600051641464
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,2048,1,0,0.7203946908315023
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,2048,2,0,0.38125868638356525
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,2048,4,0,0.212442676226298
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,2048,8,0,0.12433066964149475
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,2048,16,0,0.08062399923801422
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,2048,32,0,0.04796266555786133
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,2048,64,0,0.04362666606903076
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,2048,128,0,0.0440533310174942
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,2048,1,0,0.8807040055592855
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,2048,4,0,0.25065066417058307
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,2048,2,0,0.46063466866811115
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,2048,16,0,0.0881066620349884
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,2048,8,0,0.14494933684666952
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,2048,32,0,0.049957334995269775
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,2048,64,0,0.04593066871166229
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,2048,128,0,0.04458666841189066
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,3072,4,0,0.39261333147684735
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,3072,8,0,0.21857066949208578
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,3072,1,0,1.487493356068929
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,3072,2,0,0.7099093596140543
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,3072,16,0,0.1320373316605886
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,3072,64,0,0.06195733447869619
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,3072,32,0,0.08447999755541484
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,3072,128,0,0.05796800057093302
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,3072,8,0,0.23492266734441122
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,3072,4,0,0.42218132813771564
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,3072,2,0,0.7921120325724283
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,3072,1,0,1.5339093208312988
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,3072,16,0,0.1409226655960083
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,3072,32,0,0.08895466725031535
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,3072,64,0,0.06046933432420095
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,3072,128,0,0.058389330903689064
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,4096,4,0,0.6119360129038492
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,4096,8,0,0.33878934383392334
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,4096,2,0,1.143232027689616
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,4096,16,0,0.19925334056218466
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,4096,32,0,0.12837866942087808
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,4096,1,0,3.3390347162882485
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,4096,64,0,0.11094933748245239
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,4096,128,0,0.07276266813278198
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,4096,4,0,0.6361973285675049
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,4096,8,0,0.34675200780232746
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,4096,2,0,1.2108533382415771
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,4096,32,0,0.12622933586438498
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,4096,16,0,0.20247999827067056
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,4096,1,0,2.4133706092834473
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,4096,64,0,0.0769706666469574
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,4096,128,0,0.07197866837183635
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,6144,8,0,0.6452800035476685
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,6144,4,0,1.1898399988810222
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,6144,2,0,2.46888001759847
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,6144,16,0,0.36107198397318524
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,6144,32,0,0.2212053338686625
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,6144,64,0,0.1423786679903666
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,6144,128,0,0.11149866382280986
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,6144,1,0,7.235567728678386
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,6144,4,0,1.1445386409759521
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,6144,2,0,2.244570732116699
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,6144,16,0,0.3481493393580119
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,6144,8,0,0.611786683400472
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,6144,32,0,0.21076800425847372
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,6144,64,0,0.13699199755986533
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,6144,1,0,5.730672200520833
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,6144,128,0,0.09929600358009338
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,8192,8,0,1.0369706948598225
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,8192,4,0,1.9597172737121582
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,8192,16,0,0.7144266764322916
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,8192,32,0,0.33772265911102295
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,8192,64,0,0.22187199195226034
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,8192,2,0,6.521349589029948
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,8192,128,0,0.15252799789110819
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,8192,4,0,2.0875306129455566
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,8192,8,0,0.9466613133748373
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,8192,2,0,4.441743850708008
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,8192,32,0,0.313482662041982
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,8192,1,0,13.235300699869791
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,8192,16,0,0.5296586751937866
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,8192,64,0,0.20105600357055664
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,8192,128,0,0.13081600268681845
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,8192,1,0,8.644437154134115
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,10240,4,0,4.746335983276367
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,10240,8,0,2.2747573852539062
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,10240,16,0,1.0357920328776042
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,10240,2,0,9.6922238667806
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,10240,64,0,0.3935786485671997
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,10240,32,0,0.6380159854888916
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,10240,128,0,0.1912320057551066
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,10240,4,0,2.906165440877279
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,10240,1,0,20.06927998860677
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,10240,2,0,6.774709065755208
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,10240,32,0,0.4285920063654582
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,10240,16,0,0.7421493530273438
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,10240,8,0,1.3539199829101562
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,10240,64,0,0.2737226684888204
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,10240,128,0,0.17153600851694742
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,10240,1,0,14.079920450846354
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,12288,8,0,2.4093653361002603
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,12288,4,0,7.275360107421875
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,12288,16,0,1.1369280020395915
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,12288,32,0,0.6367253462473551
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,12288,64,0,0.4002399841944377
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,12288,128,0,0.2528853416442871
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,12288,2,0,13.989738464355469
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,12288,4,0,4.705973307291667
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,12288,2,0,9.585642496744791
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,12288,8,0,2.025749365488688
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,12288,16,0,0.9845653374989828
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,12288,32,0,0.5646666685740153
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,12288,64,0,0.34698665142059326
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,12288,128,0,0.22749332586924234
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,12288,1,0,29.993621826171875
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,12288,1,0,19.892597198486328
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,4,16384,8,0,6.144949595133464
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,4,16384,4,0,11.895098368326822
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,4,16384,16,0,2.699690818786621
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,4,16384,32,0,1.0363413492838542
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,4,16384,64,0,0.6327466567357382
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,4,16384,128,0,0.42292265097300213
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,4,16384,2,0,25.633318583170574
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,4,16384,4,0,8.578549067179361
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,4,16384,2,0,15.49509302775065
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,4,16384,16,0,1.5851359367370605
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,4,16384,8,0,2.995925267537435
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,4,16384,64,0,0.5273653268814087
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,4,16384,32,0,0.8847200075785319
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,1,1,0,0.03382933388153712
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,4,16384,128,0,0.34230931599934894
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,1,2,0,0.02565866708755493
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,1,4,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,1,8,0,0.017551999539136887
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,1,16,0,0.017157333592573803
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,1,32,0,0.015317333241303762
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,1,64,0,0.015194666882356008
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,1,128,0,0.01526933287580808
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,1,1,0,0.03568533311287562
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,1,2,0,0.027274665733178455
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,1,4,0,0.023290666441122692
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,1,8,0,0.019050666441520054
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,1,16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,1,32,0,0.01911466692884763
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,1,64,0,0.01897066707412402
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,1,128,0,0.01858666663368543
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,16,1,0,0.03569599986076355
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,16,2,0,0.025077333052953083
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,16,4,0,0.018954666952292126
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,16,8,0,0.015279999623696009
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,16,16,0,0.015061333775520325
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,16,32,0,0.017018667111794155
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,16,64,0,0.015370666980743408
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,4,16384,1,0,51.777872721354164
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,4,16384,1,0,35.63610076904297
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,16,128,0,0.015253332753976187
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,16,1,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,16,16,0,0.02102400114138921
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,16,8,0,0.021397332350413006
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,16,2,0,0.029701332251230877
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,16,4,0,0.023946667710940044
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,16,32,0,0.019088000059127808
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,16,64,0,0.019578666736682255
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,16,128,0,0.019461333751678467
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,32,1,0,0.03790933390458425
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,32,4,0,0.019797333826621372
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,32,2,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,32,8,0,0.015109332899252573
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,32,16,0,0.016095999628305435
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,32,32,0,0.016250666230916977
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,32,64,0,0.015002666662136713
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,32,128,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,32,1,0,0.045253331462542214
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,32,2,0,0.031504000226656594
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,32,4,0,0.023589332898457844
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,32,8,0,0.019637333850065868
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,32,32,0,0.019925333559513092
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,32,64,0,0.019498666127522785
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,32,16,0,0.021850667893886566
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,32,128,0,0.019546666493018467
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,64,1,0,0.04409599800904592
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,64,2,0,0.03001066545645396
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,64,4,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,64,8,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,64,16,0,0.015487999965747198
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,64,32,0,0.01695466662446658
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,64,64,0,0.01570133368174235
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,64,128,0,0.01553600033124288
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,64,1,0,0.06435733536879222
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,64,2,0,0.03559466699759165
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,64,4,0,0.02720000098148982
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,64,8,0,0.02109333376089732
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,64,16,0,0.021365332106749218
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,64,32,0,0.021536000072956085
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,64,64,0,0.021104000508785248
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,64,128,0,0.02111999938885371
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,128,1,0,0.056890666484832764
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,128,2,0,0.03570133447647095
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,128,4,0,0.02317333221435547
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,128,8,0,0.019082666685183842
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,128,16,0,0.017029333859682083
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,128,32,0,0.01706133286158244
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,128,128,0,0.016986666868130367
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,128,64,0,0.016832000265518825
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,128,1,0,0.09684800108273824
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,128,2,0,0.05614933371543884
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,128,4,0,0.03234666585922241
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,128,8,0,0.024933333198229473
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,128,16,0,0.021407999098300934
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,128,32,0,0.021386665602525074
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,128,64,0,0.0215786670645078
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,128,128,0,0.021503999829292297
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,256,1,0,0.09995733698209126
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,256,2,0,0.05853333572546641
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,256,4,0,0.03200533241033554
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,256,8,0,0.023472001155217487
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,256,16,0,0.021733333667119343
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,256,32,0,0.019600000232458115
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,256,64,0,0.019973333925008774
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,256,128,0,0.04508799811204275
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,256,1,0,0.16403200229008993
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,256,2,0,0.09544000029563904
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,256,4,0,0.05301866432030996
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,256,8,0,0.03257599969704946
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,256,16,0,0.027280000348885853
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,256,32,0,0.025445332129796345
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,256,64,0,0.025360000630219776
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,512,1,0,0.20838934183120728
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,256,128,0,0.025050667424996693
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,512,2,0,0.11506133278210957
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,512,4,0,0.06611200173695882
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,512,8,0,0.04178133110205332
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,512,64,0,0.023344000180562336
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,512,32,0,0.025402667621771496
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,512,16,0,0.027471999327341717
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,512,128,0,0.024122667809327442
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,512,2,0,0.1768746574719747
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,512,1,0,0.32525867223739624
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,512,4,0,0.10101333260536194
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,512,8,0,0.060496002435684204
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,512,16,0,0.03177600105603536
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,512,32,0,0.029232000311215717
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,512,128,0,0.02720000098148982
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,512,64,0,0.027322667340437572
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,1024,2,0,0.2685333291689555
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,1024,8,0,0.08707200487454732
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,1024,4,0,0.14842133720715842
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,1024,1,0,0.5060693422953287
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,1024,32,0,0.03366933266321818
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,1024,16,0,0.054485330979029335
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,1024,64,0,0.032469332218170166
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,1024,128,0,0.031370667119820915
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,1024,8,0,0.11629866560300191
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,1024,2,0,0.37484268347422284
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,1024,4,0,0.20422399044036865
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,1024,1,0,0.7129920323689779
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,1024,16,0,0.07085866729418437
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,1024,32,0,0.03976000100374222
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,1024,64,0,0.035749333600203194
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,1024,128,0,0.03396799912055334
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,1536,4,0,0.25675199429194134
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,1536,8,0,0.1434719959894816
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,1536,2,0,0.47235198815663654
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,1536,1,0,0.9056906700134277
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,1536,16,0,0.08917333682378133
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,1536,32,0,0.05808533231417338
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,1536,64,0,0.0415040006240209
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,1536,128,0,0.03963200002908707
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,1536,8,0,0.18175466855367026
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,1536,4,0,0.32413333654403687
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,1536,1,0,1.1825706958770752
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,1536,2,0,0.6089973449707031
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,1536,16,0,0.10834133625030518
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,1536,32,0,0.06831466654936473
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,1536,64,0,0.045968001087506614
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,1536,128,0,0.04204800228277842
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,2048,2,0,0.7237493197123209
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,2048,8,0,0.2145706613858541
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,2048,4,0,0.38708798090616864
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,2048,1,0,1.4382507006327312
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,2048,16,0,0.12666666507720947
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,2048,32,0,0.08307200173536937
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,2048,64,0,0.05227733155091604
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,2048,128,0,0.047983999053637184
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,2048,4,0,0.4652906656265259
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,2048,2,0,0.8814133008321127
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,2048,1,0,1.7253440221150715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,2048,8,0,0.25381332635879517
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,2048,16,0,0.14711466431617737
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,2048,32,0,0.09745066364606221
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,2048,64,0,0.055914665261904396
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,2048,128,0,0.04811733464399973
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,3072,4,0,0.7223947048187256
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,3072,2,0,1.368058681488037
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,3072,8,0,0.39131732781728107
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,3072,16,0,0.22501333554585776
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,3072,1,0,3.710810661315918
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,3072,32,0,0.1378933290640513
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,3072,64,0,0.09191999832789104
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,3072,128,0,0.0697813332080841
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,3072,4,0,0.7981866995493571
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,3072,2,0,1.5448692639668782
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,3072,8,0,0.42586131890614826
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,3072,16,0,0.2420426607131958
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,3072,32,0,0.15405333042144775
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,3072,1,0,3.268303871154785
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,3072,64,0,0.09506666660308838
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,3072,128,0,0.06654400130112965
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,4096,8,0,0.6167946656545004
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,4096,4,0,1.1655680338541667
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,4096,16,0,0.3422559897104899
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,4096,2,0,3.0158507029215493
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,4096,32,0,0.2279520034790039
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,4096,64,0,0.13549333810806274
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,4096,128,0,0.10006933410962422
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,4096,1,0,6.960565567016602
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,4096,4,0,1.2130293051401775
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,4096,2,0,2.45305601755778
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,4096,8,0,0.6409440040588379
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,4096,16,0,0.35306668281555176
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,4096,32,0,0.20778133471806845
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,4096,64,0,0.13451733191808066
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,4096,1,0,5.68936030069987
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,4096,128,0,0.08734400073687236
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,6144,8,0,1.1931946277618408
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,6144,4,0,2.30841064453125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,6144,16,0,0.647765318552653
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,6144,64,0,0.23042666912078857
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,6144,32,0,0.3854666550954183
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,6144,2,0,7.442298889160156
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,6144,128,0,0.15414933363596597
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,6144,4,0,2.255349318186442
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,6144,8,0,1.1534079710642497
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,6144,2,0,4.844101270039876
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,6144,1,0,14.695045471191406
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,6144,16,0,0.6213440100351969
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,6144,32,0,0.3591359853744507
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,6144,64,0,0.22027732928593954
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,6144,128,0,0.14638933539390564
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,6144,1,0,11.542352040608725
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,8,8192,8,0,1.982367992401123
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,8,8192,4,0,6.811013539632161
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,8,8192,16,0,1.0559840202331543
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,8,8192,32,0,0.624234676361084
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,8,8192,128,0,0.23289066553115845
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,8,8192,64,0,0.3541066646575928
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,8,8192,2,0,13.226479848225912
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,8,8192,4,0,4.055914560953776
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,8,8192,2,0,9.465642929077148
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,8,8192,8,0,1.8135466575622559
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,8,8192,16,0,0.9647839864095052
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,8,8192,32,0,0.5396639903386434
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,8,8192,64,0,0.32706133524576825
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,1,1,0,0.056128000219662987
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,8,8192,128,0,0.2103360096613566
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,8,8192,1,0,27.299967447916668
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,1,2,0,0.035360001027584076
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,1,4,0,0.023397333920001984
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,1,8,0,0.021295999487241108
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,1,32,0,0.014858666807413101
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,1,16,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,1,64,0,0.015365333606799444
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,1,128,0,0.014805333067973455
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,1,1,0,0.05601066847642263
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,1,2,0,0.035674666364987694
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,1,4,0,0.02741866558790207
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,1,8,0,0.023168000082174938
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,8,8192,1,0,18.511520385742188
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,1,32,0,0.019066666563351948
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,1,16,0,0.021018666525681812
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,1,64,0,0.019130667050679524
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,16,1,0,0.05825066566467285
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,16,4,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,16,2,0,0.03577066709597906
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,1,128,0,0.01775466650724411
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,16,8,0,0.01931200052301089
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,16,16,0,0.015967999895413715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,16,32,0,0.015333333363135656
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,16,64,0,0.015397333850463232
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,16,128,0,0.01516266663869222
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,16,1,0,0.06243733565012614
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,16,2,0,0.0395413339138031
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,16,4,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,16,8,0,0.023103999594847362
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,16,16,0,0.022831998765468597
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,16,32,0,0.021061333517233532
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,16,64,0,0.01959466685851415
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,16,128,0,0.02178666740655899
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,32,1,0,0.06595199803511302
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,32,2,0,0.03843733419974645
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,32,4,0,0.02805333336194356
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,32,8,0,0.02024000013868014
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,32,16,0,0.01752000053723653
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,32,32,0,0.015381333728631338
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,32,64,0,0.015615999698638916
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,32,128,0,0.01982933282852173
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,32,1,0,0.08175999919573466
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,32,2,0,0.04417600234349569
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,32,4,0,0.030933332939942677
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,32,8,0,0.025749333202838898
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,32,16,0,0.021551998953024547
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,32,32,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,32,64,0,0.021375998854637146
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,32,128,0,0.02141333371400833
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,64,1,0,0.07646400233109792
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,64,2,0,0.045642669002215065
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,64,4,0,0.02962133288383484
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,64,8,0,0.021829334398110706
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,64,16,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,64,32,0,0.01532799998919169
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,64,64,0,0.017301333447297413
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,64,128,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,64,1,0,0.113946666320165
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,64,2,0,0.06439466774463654
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,64,4,0,0.03536533315976461
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,64,16,0,0.021146667500336964
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,64,8,0,0.02738133321205775
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,64,32,0,0.021055998901526134
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,64,64,0,0.022917332748572033
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,64,128,0,0.021344001094500225
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,128,1,0,0.10174399614334106
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,128,2,0,0.05878399809201559
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,128,4,0,0.03425599883000056
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,128,8,0,0.025231999655564625
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,128,16,0,0.01757866640885671
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,128,32,0,0.0191040001809597
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,128,64,0,0.017418666432301205
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,128,128,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,128,1,0,0.17189333836237589
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,128,2,0,0.09783466657002766
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,128,4,0,0.0562666654586792
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,128,8,0,0.03200000027815501
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,128,16,0,0.023989332218964893
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,128,32,0,0.02293866624434789
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,128,64,0,0.021087999145189922
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,128,128,0,0.021029333273569744
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,256,1,0,0.18453866243362427
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,256,2,0,0.0995786686738332
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,256,4,0,0.06011199951171875
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,256,8,0,0.033887999753157295
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,256,16,0,0.023381332556406658
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,256,32,0,0.021712000171343487
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,256,64,0,0.0223786657055219
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,256,128,0,0.021082667013009388
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,256,1,0,0.30510934193929035
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,256,2,0,0.16312533617019653
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,256,4,0,0.09490666786829631
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,256,8,0,0.05407999952634176
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,256,16,0,0.03002133220434189
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,256,64,0,0.02550400048494339
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,256,32,0,0.027488000690937042
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,256,128,0,0.023813332120577495
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,512,1,0,0.39414934317270917
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,512,2,0,0.20856000979741415
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,512,8,0,0.06771733363469441
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,512,4,0,0.11557867129643758
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,512,16,0,0.04452266792456309
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,512,32,0,0.02741866558790207
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,512,64,0,0.02569066733121872
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,512,128,0,0.025706666211287182
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,512,1,0,0.6275200049082438
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,512,2,0,0.3308853308359782
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,512,4,0,0.17839467525482178
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,512,8,0,0.10171733299891154
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,512,16,0,0.06225599845250448
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,512,32,0,0.03541333228349686
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,512,64,0,0.03160533308982849
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,512,128,0,0.029552000264326733
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,1024,2,0,0.5136693318684896
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,1024,4,0,0.2728106578191121
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,1024,8,0,0.15181333820025125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,1024,1,0,0.9836906592051188
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,1024,32,0,0.05874133110046387
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,1024,16,0,0.09187733133633931
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,1024,64,0,0.0377866675456365
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,1024,128,0,0.03426666557788849
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,1024,4,0,0.377461314201355
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,1024,8,0,0.20517333348592123
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,1024,2,0,0.7251253128051758
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,1024,1,0,1.4063146909077961
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,1024,32,0,0.07504533231258392
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,1024,16,0,0.11971732974052429
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,1024,64,0,0.045279999574025474
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,1024,128,0,0.038592000802357994
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,1536,4,0,0.4815359910329183
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,1536,2,0,0.9018666744232178
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,1536,1,0,2.2123252550760903
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,1536,8,0,0.26364799340566
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,1536,16,0,0.15493333339691162
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,1536,32,0,0.09583999713261922
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,1536,64,0,0.06597333153088887
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,1536,128,0,0.04830400149027506
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,1536,4,0,0.6164746681849161
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,1536,2,0,1.1889920234680176
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,1536,1,0,2.3499840100606284
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,1536,8,0,0.3245120048522949
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,1536,16,0,0.18408000469207764
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,1536,32,0,0.11521066228548686
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,1536,64,0,0.07474133372306824
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,1536,128,0,0.04823466638724009
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,2048,4,0,0.7422506809234619
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,2048,8,0,0.3924746513366699
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,2048,2,0,1.4008533159891765
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,2048,16,0,0.22585066159566244
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,2048,32,0,0.1346826652685801
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,2048,64,0,0.10243200262387593
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,2048,1,0,3.5159521102905273
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,2048,128,0,0.0652213344971339
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,2048,8,0,0.4697173436482747
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,2048,4,0,0.894442637761434
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,2048,2,0,1.7293225924173992
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,2048,16,0,0.2597973346710205
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,2048,32,0,0.1569546659787496
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,2048,1,0,3.4202454884847007
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,2048,128,0,0.06431999802589417
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,2048,64,0,0.0979306697845459
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,3072,8,0,0.7273973623911539
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,3072,4,0,1.4033172925313313
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,3072,16,0,0.40110401312510174
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,3072,32,0,0.23585599660873413
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,3072,2,0,3.7882560094197593
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,3072,64,0,0.14800000190734863
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,3072,128,0,0.10431999961535136
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,3072,1,0,8.226032257080078
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,3072,4,0,1.5610186258951824
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,3072,2,0,3.4697707494099936
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,3072,16,0,0.44496532281239826
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,3072,8,0,0.8028746445973715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,3072,32,0,0.2985333402951558
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,3072,64,0,0.15477333466211954
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,3072,128,0,0.10418666402498881
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,3072,1,0,7.286080042521159
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,16,4096,4,0,2.216602643330892
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,16,4096,8,0,1.1584853331247966
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,16,4096,16,0,0.6273653507232666
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,16,4096,32,0,0.3558613459269206
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,16,4096,64,0,0.22022932767868042
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,16,4096,128,0,0.15044266978899637
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,16,4096,2,0,7.196591695149739
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,16,4096,4,0,2.4173547426859536
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,16,4096,2,0,5.4281972249348955
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,16,4096,8,0,1.2317386468251545
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,16,4096,1,0,14.727877298990885
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,16,4096,32,0,0.3641386826833089
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,16,4096,16,0,0.6541813214619955
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,16,4096,128,0,0.14409066239992777
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,1,1,0,0.0956106682618459
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,16,4096,64,0,0.22114666302998862
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,1,2,0,0.0543146679798762
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,1,4,0,0.03401066611210505
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,1,8,0,0.025311999022960663
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,1,16,0,0.02347733328739802
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,16,4096,1,0,10.634848276774088
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,1,32,0,0.01741333305835724
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,1,64,0,0.017114666601022083
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,1,128,0,0.016458666572968166
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,1,2,0,0.05658133327960968
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,1,4,0,0.03771200031042099
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,1,1,0,0.09549867113431294
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,1,8,0,0.027461332579453785
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,1,16,0,0.02367999901374181
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,1,32,0,0.019391999890406925
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,1,64,0,0.019413333386182785
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,1,128,0,0.019141333798567455
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,16,1,0,0.10781866312026978
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,16,2,0,0.0588319996992747
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,16,8,0,0.025194667279720306
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,16,4,0,0.03615466753641764
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,16,16,0,0.019194666296243668
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,16,32,0,0.015594666202863058
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,16,128,0,0.01515199989080429
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,16,64,0,0.016586666305859882
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,16,1,0,0.11824533343315125
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,16,2,0,0.062368000547091164
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,16,4,0,0.039887999494870506
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,16,8,0,0.029050665597120922
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,16,16,0,0.023498666783173878
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,16,32,0,0.02179199953873952
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,16,128,0,0.019424000134070713
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,16,64,0,0.020031999796628952
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,32,1,0,0.11430399616559346
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,32,2,0,0.06549866497516632
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,32,4,0,0.037962667644023895
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,32,8,0,0.027984000742435455
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,32,16,0,0.019925333559513092
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,32,32,0,0.016634666671355564
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,32,64,0,0.016837333639462788
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,32,128,0,0.015024000157912573
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,32,1,0,0.14497066537539163
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,32,2,0,0.08183466891447704
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,32,4,0,0.04387733340263367
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,32,8,0,0.03199466566244761
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,32,16,0,0.023647998770078022
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,32,32,0,0.021541332205136616
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,32,64,0,0.021269333859284718
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,32,128,0,0.019738666713237762
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,64,1,0,0.13847999771436056
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,64,2,0,0.07587733368078868
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,64,4,0,0.046394666035970054
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,64,8,0,0.029440000653266907
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,64,16,0,0.021210665504137676
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,64,32,0,0.01747200017174085
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,64,64,0,0.016074666132529575
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,64,128,0,0.015354666858911514
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,64,1,0,0.20706133047739664
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,64,2,0,0.11473600069681804
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,64,4,0,0.06348266700903575
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,64,8,0,0.03610666592915853
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,64,16,0,0.027552001178264618
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,64,32,0,0.022837333381175995
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,64,64,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,64,128,0,0.02130666623512904
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,128,1,0,0.18989866971969604
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,128,2,0,0.10196266571680705
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,128,4,0,0.05952000121275584
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,128,8,0,0.0356480007370313
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,128,16,0,0.023711999257405598
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,128,32,0,0.019589333484570186
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,128,64,0,0.019023999571800232
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,128,128,0,0.017024000485738117
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,128,1,0,0.3201120098431905
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,128,2,0,0.17390400171279907
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,128,4,0,0.09805333614349365
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,128,8,0,0.05705599983533224
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,128,16,0,0.032416000962257385
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,128,64,0,0.02161066730817159
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,128,32,0,0.023610666394233704
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,128,128,0,0.0223786657055219
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,256,2,0,0.1858666737874349
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,256,4,0,0.10167466600735982
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,256,1,0,0.3481066624323527
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,256,8,0,0.06246933341026306
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,256,16,0,0.03589866558710734
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,256,32,0,0.024901332954565685
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,256,128,0,0.021920000513394673
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,256,64,0,0.023311999936898548
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,256,1,0,0.5882986783981323
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,256,8,0,0.09604799747467041
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,256,4,0,0.1649386684099833
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,256,2,0,0.3059626619021098
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,256,16,0,0.057328000664711
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,256,64,0,0.02720000098148982
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,256,32,0,0.03160533308982849
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,256,128,0,0.025413334369659424
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,512,8,0,0.11924266815185547
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,512,4,0,0.21435733636220297
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,512,1,0,0.760586659113566
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,512,2,0,0.39563198884328205
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,512,16,0,0.07190933326880138
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,512,32,0,0.04790933430194855
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,512,64,0,0.03081600119670232
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,512,128,0,0.027327999472618103
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,512,8,0,0.17976532379786173
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,512,4,0,0.3312106728553772
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,512,2,0,0.6355146567026774
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,512,1,0,1.2359253565470378
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,512,16,0,0.10593600074450175
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,512,32,0,0.06607999900976817
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,512,64,0,0.03980266551176707
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,512,128,0,0.03349866718053818
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,1024,2,0,0.9960693518320719
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,1024,8,0,0.27932266394297284
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,1024,4,0,0.5208319822947184
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,1024,1,0,2.138271967569987
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,1024,16,0,0.16285866498947144
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,1024,32,0,0.0993386705716451
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,1024,64,0,0.07132799923419952
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,1024,128,0,0.050069332122802734
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,1024,8,0,0.38181865215301514
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,1024,4,0,0.7304746309916178
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,1024,2,0,1.4192266464233398
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,1024,32,0,0.12634666760762533
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,1024,16,0,0.21305066347122192
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,1024,64,0,0.08133333424727122
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,1024,1,0,2.788421312967936
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,1024,128,0,0.05436266462008158
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,1536,8,0,0.4920479853947957
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,1536,4,0,0.9177599747975668
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,1536,2,0,2.1220213572184243
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,1536,16,0,0.27292267481486004
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,1536,32,0,0.16375466187795004
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,1536,64,0,0.10700800021489461
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,1536,128,0,0.07739733159542084
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,1536,1,0,5.275466601053874
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,1536,4,0,1.2032426993052165
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,1536,2,0,2.3634026845296225
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,1536,8,0,0.622650663057963
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,1536,16,0,0.3364693323771159
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,1536,1,0,4.766613324483235
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,1536,32,0,0.19285333156585693
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,1536,64,0,0.12239999572436015
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,1536,128,0,0.09064533313115437
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,32,2048,4,0,1.4168532689412434
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,32,2048,2,0,4.322106679280599
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,32,2048,16,0,0.40749335289001465
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,32,2048,8,0,0.764469305674235
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,32,2048,32,0,0.23649599154790243
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,32,2048,64,0,0.14919466773668924
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,32,2048,128,0,0.11288000146547954
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,32,2048,1,0,9.019402821858725
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,32,2048,4,0,1.7615893681844075
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,32,2048,8,0,0.9088053703308105
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,32,2048,16,0,0.4893706639607747
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,32,2048,2,0,3.6824000676472983
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,32,2048,32,0,0.2739359935124715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,32,2048,64,0,0.16582399606704712
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,64,1,1,0,0.17680533727010092
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,32,2048,128,0,0.10964266459147136
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,64,1,2,0,0.097680002450943
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,64,1,4,0,0.05599466462930044
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,64,1,8,0,0.033520000676314034
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,64,1,16,0,0.025370667378107708
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,32,2048,1,0,7.766917546590169
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,64,1,64,0,0.017125333348910015
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,64,1,32,0,0.01982933282852173
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,64,1,128,0,0.015647999942302704
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,64,1,1,0,0.1734293301900228
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,64,1,4,0,0.05622399846712748
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,64,1,2,0,0.09525332848230998
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,64,1,8,0,0.03800000001986822
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,64,1,16,0,0.02752000093460083
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,64,1,64,0,0.019280000279347103
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,64,1,32,0,0.023589332898457844
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,64,1,128,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,64,16,1,0,0.197434663772583
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,64,16,4,0,0.058965335289637245
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,64,16,8,0,0.03667200108369192
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,64,16,2,0,0.10731200377146403
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,64,16,16,0,0.025519999365011852
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,64,16,32,0,0.019551999866962433
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,64,16,128,0,0.015274666249752045
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,64,16,64,0,0.016437333077192307
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,64,16,2,0,0.1183733344078064
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,64,16,1,0,0.2174453337987264
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,64,16,8,0,0.03979733337958654
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,64,16,4,0,0.06247466802597046
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,64,16,32,0,0.02348800003528595
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,64,16,16,0,0.029157333076000214
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,64,16,64,0,0.021136000752449036
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,64,16,128,0,0.021429332594076794
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,64,32,2,0,0.11597333351771037
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,64,32,1,0,0.21620800097783408
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,64,32,4,0,0.06685866912206014
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,64,32,8,0,0.038549333810806274
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,64,32,16,0,0.027829334139823914
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,64,32,32,0,0.021744000415007275
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,64,32,64,0,0.017290666699409485
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,64,32,128,0,0.017194667210181553
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,64,32,2,0,0.14639467000961304
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,64,32,1,0,0.2689173420270284
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,64,32,8,0,0.04424533247947693
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,64,32,4,0,0.08118399977684021
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,64,32,16,0,0.03172266731659571
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,64,32,32,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,64,32,128,0,0.01970133309563001
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,64,32,64,0,0.02123733361562093
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,64,64,2,0,0.13783466815948486
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,64,64,1,0,0.26446932554244995
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,64,64,4,0,0.07713599999745686
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,64,64,8,0,0.04552533229192098
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,64,64,16,0,0.029605334003766377
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,64,64,32,0,0.022170667846997578
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,64,64,128,0,0.017258666455745697
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,64,64,64,0,0.01788266624013583
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,64,64,2,0,0.20869867006937662
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,64,64,1,0,0.39137065410614014
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,64,64,8,0,0.06533866624037425
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,64,64,4,0,0.11564800143241882
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,64,64,16,0,0.03807999938726425
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,64,64,32,0,0.027632000545660656
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,64,64,64,0,0.023391999304294586
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,64,64,128,0,0.021498667697111767
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,64,128,2,0,0.1925440033276876
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,64,128,1,0,0.36166401704152423
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,64,128,8,0,0.060496002435684204
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,64,128,4,0,0.10430933038393657
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,64,128,16,0,0.03835200021664301
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,64,128,32,0,0.025786665578683216
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,64,128,64,0,0.020090666910012562
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,64,128,128,0,0.02045866722861926
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,64,128,1,0,0.6253919998804728
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,64,128,2,0,0.32494932413101196
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,64,128,8,0,0.10105066498120625
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,64,128,4,0,0.17541333039601645
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,64,128,16,0,0.0609440008799235
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,64,128,32,0,0.033344000577926636
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,64,128,64,0,0.02624533325433731
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,64,128,128,0,0.024421334266662598
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,64,256,2,0,0.35448535283406574
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,64,256,4,0,0.18907199303309122
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,64,256,1,0,0.6815040111541748
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,64,256,8,0,0.10624000430107117
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,64,256,16,0,0.06637333333492279
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,64,256,32,0,0.04201066493988037
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,64,256,128,0,0.025306666890780132
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,64,256,64,0,0.025770666698614757
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,64,256,1,0,1.1595679918924968
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,64,256,4,0,0.31034666299819946
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,64,256,2,0,0.5956000089645386
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,64,256,8,0,0.16749332348505655
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,64,256,16,0,0.10102400183677673
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,64,256,32,0,0.06140799820423126
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,64,256,64,0,0.03573866685231527
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,64,256,128,0,0.02976000060637792
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,64,512,4,0,0.40481066703796387
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,64,512,2,0,0.7772640387217203
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,64,512,1,0,1.493834654490153
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,64,512,8,0,0.2201333244641622
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,64,512,16,0,0.1283253331979116
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,64,512,32,0,0.08211733400821686
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,64,512,64,0,0.058693334460258484
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,64,512,128,0,0.043477331598599754
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,64,512,8,0,0.3348746697107951
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,64,512,4,0,0.6454399824142456
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,64,512,2,0,1.261344035466512
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,64,512,16,0,0.187008003393809
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,64,512,1,0,2.469871997833252
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,64,512,32,0,0.11170132954915364
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,64,512,64,0,0.07281599938869476
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,64,512,128,0,0.04785599807898203
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,64,1024,4,0,1.009658654530843
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,64,1024,8,0,0.5292213360468546
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,64,1024,2,0,2.1288426717122397
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,64,1024,32,0,0.176639993985494
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,64,1024,16,0,0.29602134227752686
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,64,1024,64,0,0.13777066270510355
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,64,1024,128,0,0.08504000306129456
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,64,1024,1,0,5.783440272013347
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,64,1024,4,0,1.4484693209330242
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,64,1024,2,0,2.8519519170125327
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,64,1024,8,0,0.7436160246531168
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,64,1024,16,0,0.4000320037206014
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,64,1024,32,0,0.22670932610829672
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,64,1024,64,0,0.137472003698349
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,64,1024,128,0,0.09266133109728496
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,64,1024,1,0,6.051328023274739
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,128,1,4,0,0.09620267152786255
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,128,1,1,0,0.33928533395131427
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,128,1,2,0,0.1779680053393046
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,128,1,32,0,0.025253333151340485
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,128,1,16,0,0.03419200082619985
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,128,1,8,0,0.056464001536369324
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,128,1,64,0,0.02128000060717265
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,128,1,128,0,0.01704000060757001
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,128,1,2,0,0.17414933443069458
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,128,1,4,0,0.09788266817728679
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,128,1,1,0,0.3290506601333618
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,128,1,8,0,0.05839466551939646
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,128,1,16,0,0.03770133356253306
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,128,1,32,0,0.02959999938805898
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,128,1,64,0,0.02327466756105423
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,128,1,128,0,0.021189334491888683
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,128,16,1,0,0.3825920025507609
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,128,16,4,0,0.10593066612879436
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,128,16,2,0,0.19843733310699463
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,128,16,8,0,0.06325866778691609
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,128,16,16,0,0.037808001041412354
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,128,16,32,0,0.025498665869235992
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,128,16,64,0,0.019258666783571243
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,128,16,128,0,0.017237332959969837
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,128,16,1,0,0.40986132621765137
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,128,16,4,0,0.11778666575749715
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,128,16,8,0,0.062352001667022705
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,128,16,2,0,0.21654399236043295
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,128,16,16,0,0.03992533435424169
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,128,16,64,0,0.02332799881696701
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,128,16,128,0,0.02124800036350886
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,128,16,32,0,0.029919999341169994
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,128,32,8,0,0.06769066552321117
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,128,32,4,0,0.11620266238848369
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,128,32,2,0,0.2140106757481893
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,128,32,1,0,0.41208000977834064
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,128,32,16,0,0.03941866755485535
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,128,32,32,0,0.027456000447273254
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,128,32,64,0,0.021114667256673176
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,128,32,128,0,0.017077332983414333
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,128,32,2,0,0.2702453335126241
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,128,32,8,0,0.08311999837557475
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,128,32,1,0,0.515941341718038
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,128,32,4,0,0.14496533075968424
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,128,32,16,0,0.0461706668138504
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,128,32,64,0,0.025424001117547352
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,128,32,128,0,0.02149333308140437
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,128,32,32,0,0.0317546675602595
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,128,64,1,0,0.5140693187713623
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,128,64,4,0,0.1389173368612925
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,128,64,2,0,0.26702932516733807
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,128,64,8,0,0.07852800190448761
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,128,64,16,0,0.048245335618654885
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,128,64,32,0,0.031770666440327965
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,128,64,64,0,0.023189333577950794
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,128,64,128,0,0.019354666272799175
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,128,64,1,0,0.7638773123423258
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,128,64,2,0,0.3963786760965983
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,128,64,4,0,0.21040532986323038
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,128,64,8,0,0.11839999755223592
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,128,64,16,0,0.0683840016523997
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,128,64,32,0,0.03818666686614355
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,128,64,64,0,0.02757333219051361
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,128,64,128,0,0.0249439999461174
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,128,128,2,0,0.36774933338165283
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,128,128,1,0,0.7003040313720703
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,128,128,4,0,0.1957333286603292
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,128,128,8,0,0.1079306701819102
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,128,128,16,0,0.06558933357397716
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,128,128,32,0,0.04274133344491323
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,128,128,64,0,0.03052799900372823
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,128,128,128,0,0.023141334454218548
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,128,128,1,0,1.237562656402588
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,128,128,2,0,0.6324106852213541
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,128,128,4,0,0.3284533421198527
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,128,128,32,0,0.06214400132497152
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,128,128,16,0,0.1034453312555949
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,128,128,8,0,0.17934934298197427
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,128,128,128,0,0.027482666075229645
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,128,128,64,0,0.03693866729736328
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,128,256,4,0,0.3579839865366618
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,128,256,1,0,1.3481334050496419
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,128,256,2,0,0.682528018951416
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,128,256,8,0,0.1955946683883667
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,128,256,16,0,0.11384532848993938
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,128,256,32,0,0.0746559997399648
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,128,256,64,0,0.05211733281612396
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,128,256,128,0,0.03921066721280416
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,128,256,4,0,0.6005813280741373
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,128,256,8,0,0.3142613371213277
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,128,256,2,0,1.1752320130666096
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,128,256,1,0,2.2987839380900064
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,128,256,16,0,0.1755359967549642
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,128,256,32,0,0.10711999734242757
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,128,256,64,0,0.06887466708819072
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,128,256,128,0,0.046469335754712425
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,128,512,4,0,0.7858719825744629
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,128,512,2,0,1.5245119730631511
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,128,512,8,0,0.42126933733622235
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,128,512,16,0,0.236842672030131
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,128,512,32,0,0.14340266585350037
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,128,512,64,0,0.09758399923642476
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,128,512,1,0,2.9933385848999023
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,128,512,128,0,0.07391466697057088
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,128,512,4,0,1.2828426361083984
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,128,512,8,0,0.6563200155893961
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,128,512,2,0,2.5249013900756836
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,128,512,32,0,0.19992534319559732
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,128,512,16,0,0.35556264718373615
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,128,512,64,0,0.12388267119725545
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,128,512,128,0,0.08585600058237712
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,128,512,1,0,5.00820795694987
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,256,1,2,0,0.3498186667760213
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,256,1,1,0,0.6686453024546305
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,256,1,4,0,0.18306666612625122
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,256,1,32,0,0.035631999373435974
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,256,1,16,0,0.05756799876689911
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,256,1,8,0,0.1009173293908437
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,256,1,64,0,0.025722667574882507
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,256,1,128,0,0.019285333653291065
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,256,1,2,0,0.33290666341781616
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,256,1,1,0,0.6440800031026205
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,256,1,4,0,0.1757813294728597
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,256,1,8,0,0.09927999973297119
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,256,1,16,0,0.06043200194835663
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,256,1,64,0,0.03186133255561193
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,256,1,32,0,0.04020266731580099
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,256,1,128,0,0.02533866713444392
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,256,16,1,0,0.7318666776021322
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,256,16,4,0,0.19880000750223795
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,256,16,8,0,0.10873599847157796
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,256,16,2,0,0.37379733721415204
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,256,16,32,0,0.03764266769091288
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,256,16,16,0,0.06049066781997681
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,256,16,64,0,0.027530667682488758
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,256,16,128,0,0.021498667697111767
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,256,16,2,0,0.41196266810099286
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,256,16,4,0,0.216922660668691
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,256,16,1,0,0.7982347011566162
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,256,16,8,0,0.11891733606656392
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,256,16,16,0,0.06461333235104878
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,256,16,32,0,0.03957333415746689
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,256,16,128,0,0.025439999997615814
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,256,16,64,0,0.030991998811562855
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,256,32,1,0,0.8073386351267496
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,256,32,8,0,0.11892799536387126
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,256,32,4,0,0.2169546683629354
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,256,32,2,0,0.41196266810099286
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,256,32,16,0,0.06934399902820587
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,256,32,32,0,0.04009066770474116
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,256,32,64,0,0.029877332349618275
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,256,32,128,0,0.025274666647116344
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,256,32,2,0,0.5186346769332886
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,256,32,1,0,1.0126187006632488
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,256,32,4,0,0.27326399087905884
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,256,32,8,0,0.14878400166829428
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,256,32,32,0,0.04770133395989736
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,256,32,16,0,0.08516266942024231
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,256,32,64,0,0.033285332222779594
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,256,32,128,0,0.027999999622503918
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,256,64,1,0,1.0120373566945393
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,256,64,4,0,0.27077333132425946
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,256,64,2,0,0.5200373331705729
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,256,64,8,0,0.14732266465822855
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,256,64,16,0,0.08541867136955261
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,256,64,32,0,0.05230399966239929
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,256,64,64,0,0.03634133438269297
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,256,64,128,0,0.027562665442625683
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,256,64,4,0,0.400762677192688
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,256,64,2,0,0.7744853496551514
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,256,64,8,0,0.21273066600163779
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,256,64,1,0,1.5275306701660156
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,256,64,16,0,0.12075733145078023
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,256,64,64,0,0.04429866870244344
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,256,64,32,0,0.07318933308124542
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,256,64,128,0,0.03212266663710276
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,256,128,4,0,0.3789920012156169
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,256,128,8,0,0.20507733027140299
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,256,128,2,0,0.7144213517506918
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,256,128,1,0,1.387605349222819
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,256,128,16,0,0.11741333206494649
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,256,128,32,0,0.07374933362007141
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,256,128,64,0,0.0525546669960022
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,256,128,128,0,0.041477332512537636
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,256,128,8,0,0.33714667956034344
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,256,128,4,0,0.6382026672363281
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,256,128,2,0,1.256229321161906
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,256,128,1,0,2.4728639920552573
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,256,128,16,0,0.18548800547917685
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,256,128,32,0,0.10777599612871806
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,256,128,64,0,0.07082133491834004
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,256,128,128,0,0.048112000028292336
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,32,256,256,4,0,0.6877600351969401
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,64,256,256,2,0,1.3399732907613118
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,16,256,256,8,0,0.37112534046173096
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,8,256,256,16,0,0.21053866545359293
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,128,256,256,1,0,2.655285358428955
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,2,256,256,64,0,0.09231467048327129
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,4,256,256,32,0,0.1290880044301351
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,float16,1,256,256,128,0,0.06796266635258992
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,16,256,256,8,0,0.6180426677068075
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,32,256,256,4,0,1.199893315633138
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,8,256,256,16,0,0.33342401186625165
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,64,256,256,2,0,2.350378672281901
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,4,256,256,32,0,0.18986666202545166
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,2,256,256,64,0,0.11948800086975098
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,1,256,256,128,0,0.08080000181992848
TRTLLM,1.1.0,NVIDIA GB200,mla_context,default,float16,fp8,128,256,256,1,0,4.669727961222331
