framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,1,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,1,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,1,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,1,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,1,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,1,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,1,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,1,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,1,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,1,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,1,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,1,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,1,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,3,0.011776000261306763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,3,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,3,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,3,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,3,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,3,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,3,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,3,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,3,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,3,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,3,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,3,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,3,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,3,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,3,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,3,0.010832000523805618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,7,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,7,0.011962667107582092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,7,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,7,0.012495999534924826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,7,0.011594666788975397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,7,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,7,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,7,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,7,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,7,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,7,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,7,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,15,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,7,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,15,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,15,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,15,0.011530666301647821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,15,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,15,0.01081066702802976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,7,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,15,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,7,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,15,0.011893333246310553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,15,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,15,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,15,0.011525332927703857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,15,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,15,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,15,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,15,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,15,0.01191466674208641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,31,0.011727999895811081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,31,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,31,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,31,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,31,0.011893333246310553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,31,0.011551999797423681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,31,0.011567999919255575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,31,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,31,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,31,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,31,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,31,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,31,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,31,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,31,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,63,0.011050666371981302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,63,0.011941333611806234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,63,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,63,0.011823999385039011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,63,0.011722666521867117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,63,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,63,0.011727999895811081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,63,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,63,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,63,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,63,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,63,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,63,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,127,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,127,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,127,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,127,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,127,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,127,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,127,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,127,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,127,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,127,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,127,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,127,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,127,0.010751999914646149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,127,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,127,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,255,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,255,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,255,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,255,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,255,0.011541333049535751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,255,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,255,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,255,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,255,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,255,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,255,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,255,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,255,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,255,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,255,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,255,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,511,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,511,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,511,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,511,0.013839999834696451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,511,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,511,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,511,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,511,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,511,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,511,0.01357866699496905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,511,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,511,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,511,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,511,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,511,0.013455999394257864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,1023,0.014778666198253632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,511,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,1023,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,1023,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,1023,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,1023,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,1023,0.013514666507641474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,1023,0.01351999988158544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,1023,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,1023,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,1023,0.013834666460752487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,1023,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,1023,0.01393066719174385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,1023,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,1023,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,1023,0.013461332768201828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,1023,0.013728000223636627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,2047,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,2047,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,2047,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,2047,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,2047,0.01588800052801768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,2047,0.01469333345691363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,2047,0.014720000326633453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,2047,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,2047,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,2047,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,2047,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,2047,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,2047,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,2047,0.01351999988158544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,2047,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,2047,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,4095,0.019893333315849304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,4095,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,4095,0.01659199967980385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,4095,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,4095,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,4095,0.01571200042963028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,4095,0.01563199982047081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,4095,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,4095,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,4095,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,4095,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,4095,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,4095,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,4095,0.013440000514189402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,4095,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,4095,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,8191,0.026005332668622334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,8191,0.019802667200565338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,8191,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,8191,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,8191,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,8191,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,8191,0.018383999665578205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,8191,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,8191,0.02089600016673406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,8191,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,8191,0.01794133335351944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,8191,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,8191,0.017765333255132038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,8191,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,8191,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,8191,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,16383,0.03755199909210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,16383,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,16383,0.023631999890009563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,16383,0.02274133265018463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,16383,0.02290133386850357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,16383,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,16383,0.02277333289384842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,16383,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,16383,0.025279998779296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,16383,0.021903999149799347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,16383,0.021669333179791767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,16383,0.019632000476121902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,16383,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,16383,0.019621333728233974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,16383,0.019802667200565338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,16383,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,32767,0.03612799942493439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,32767,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,32767,0.033439998825391136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,32767,0.027669332921504974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,32767,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,32767,0.02672533442576726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,32767,0.026586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,32767,0.027237333357334137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,32767,0.02571200082699458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,32767,0.031530665854612984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,32767,0.02422933280467987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,32767,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,32767,0.023578666150569916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,32767,0.025514667232831318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,32767,0.02513066679239273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,32767,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,65535,0.0376800000667572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,65535,0.042223999897638954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,65535,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,65535,0.03612799942493439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,65535,0.03387733300526937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,65535,0.035504000882307686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,65535,0.03535466641187668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,65535,0.03756266583998998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,65535,0.035375999907652535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,65535,0.03355200091997782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,65535,0.02978666623433431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,65535,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,65535,0.027994667490323383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,65535,0.027621333797772724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,65535,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,65535,0.028351999819278717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1,1,1,131071,0.059952000776926674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1,1,4,131071,0.06890133519967397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1,1,8,131071,0.053957333167394005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1,1,2,131071,0.06287999947865804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1,1,16,131071,0.053258667389551796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1,1,64,131071,0.054192001620928444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1,1,32,131071,0.05637866755326589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1,1,128,131071,0.056746666630109154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1,1,1,131071,0.039962666730086006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1,1,2,131071,0.04193066557248434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1,1,4,131071,0.041690667470296226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1,1,8,131071,0.034234667817751564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1,1,16,131071,0.03249066571394602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1,1,32,131071,0.03408533334732056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1,1,64,131071,0.03402133285999298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1,1,128,131071,0.03366400053103765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,1,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,1,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,1,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,1,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,1,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,1,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,1,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,1,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,1,0.011616000284751257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,1,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,1,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,1,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,1,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,1,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,1,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,3,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,3,0.012389333297808966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,3,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,3,0.010757333288590113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,3,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,3,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,3,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,3,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,3,0.011567999919255575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,3,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,3,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,3,0.011301333705584208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,3,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,3,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,7,0.011685332904259363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,7,0.012186666329701742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,7,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,7,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,7,0.011530666301647821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,7,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,7,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,7,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,7,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,7,0.010965333630641302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,7,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,7,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,7,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,7,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,7,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,7,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,15,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,15,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,15,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,15,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,15,0.011744000017642975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,15,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,15,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,15,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,15,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,15,0.011429333438475927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,15,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,15,0.010784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,15,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,15,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,15,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,31,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,15,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,31,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,31,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,31,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,31,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,31,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,31,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,31,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,31,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,31,0.01180800050497055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,31,0.011440000186363855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,31,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,31,0.01192533348997434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,31,0.011509332805871964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,31,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,31,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,63,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,63,0.012309333930412928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,63,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,63,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,63,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,63,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,63,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,63,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,63,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,63,0.01137599969903628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,63,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,63,0.011594666788975397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,63,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,63,0.011621333658695221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,127,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,127,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,127,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,127,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,127,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,127,0.011514666179815928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,127,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,127,0.010944000134865442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,127,0.012229333321253458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,127,0.012122667084137598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,127,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,127,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,127,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,127,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,127,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,127,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,255,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,255,0.012341332932313284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,255,0.011157333850860596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,255,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,255,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,255,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,255,0.011621333658695221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,255,0.012165332833925882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,255,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,255,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,255,0.011616000284751257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,255,0.011616000284751257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,255,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,255,0.01099733387430509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,511,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,511,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,511,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,511,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,511,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,511,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,511,0.012655999511480331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,511,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,511,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,511,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,511,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,511,0.012533333152532578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,511,0.011642667154471079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,511,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,511,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,511,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,1023,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,1023,0.014698666830857595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,1023,0.013552000125249227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,1023,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,1023,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,1023,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,1023,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,1023,0.013354666531085968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,1023,0.014218666901191076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,1023,0.013514666507641474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,1023,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,1023,0.013455999394257864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,1023,0.012058666596810022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,1023,0.010901333143313726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,1023,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,1023,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,2047,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,2047,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,2047,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,2047,0.013237333546082178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,2047,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,2047,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,2047,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,2047,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,2047,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,2047,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,2047,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,2047,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,2047,0.011871999750534693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,2047,0.011616000284751257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,2047,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,2047,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,4095,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,4095,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,4095,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,4095,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,4095,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,4095,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,4095,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,4095,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,4095,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,4095,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,4095,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,4095,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,4095,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,4095,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,4095,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,4095,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,8191,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,8191,0.029498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,8191,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,8191,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,8191,0.017952000101407368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,8191,0.018266666680574417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,8191,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,8191,0.01777600000301997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,8191,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,8191,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,8191,0.01800000046690305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,8191,0.01836266616980235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,8191,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,8191,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,8191,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,16383,0.027658666173617046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,8191,0.017749333133300144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,16383,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,16383,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,16383,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,16383,0.024149333437283833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,16383,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,16383,0.02369066576162974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,16383,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,16383,0.027658666173617046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,16383,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,16383,0.022122666239738464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,16383,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,16383,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,16383,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,16383,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,16383,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,32767,0.03268266717592875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,32767,0.03757333258787791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,32767,0.029338667790095013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,32767,0.03614933292071024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,32767,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,32767,0.029552000264326733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,32767,0.030794667700926464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,32767,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,32767,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,32767,0.03350399931271871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,32767,0.027066667874654133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,32767,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,32767,0.02605333427588145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,32767,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,32767,0.02608533451954524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,65535,0.052613332867622375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,32767,0.025781333446502686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,65535,0.06071466704209646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,65535,0.0552106648683548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,65535,0.05188799897829691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,65535,0.05194666484991709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,65535,0.05328533550103506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,65535,0.05379733443260193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,65535,0.05319466690222422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,65535,0.03355200091997782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,65535,0.03610666592915853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,65535,0.03356799980004629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,65535,0.030181333422660828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,65535,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,65535,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,65535,0.02980799973011017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,65535,0.03028800090154012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,2,1,1,131071,0.07751999795436859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,2,1,2,131071,0.08573866883913676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,2,1,4,131071,0.08925867080688477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,2,1,16,131071,0.0757173349459966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,2,1,8,131071,0.07629333436489105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,2,1,32,131071,0.07708799839019775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,2,1,128,131071,0.07737066845099132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,2,1,64,131071,0.07668266693751018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,2,1,1,131071,0.052042668064435325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,2,1,2,131071,0.05714133381843567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,2,1,8,131071,0.048245335618654885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,2,1,4,131071,0.05379733443260193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,2,1,16,131071,0.04901333153247833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,2,1,32,131071,0.050154666105906166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,2,1,64,131071,0.05073066552480062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,2,1,128,131071,0.04991999765237173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,1,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,1,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,1,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,1,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,1,0.011605333536863327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,1,0.011733333269755045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,1,0.011391999820868174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,1,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,1,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,1,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,1,0.011477333803971609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,1,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,1,0.011525332927703857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,1,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,1,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,3,0.012629333883523941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,3,0.01231466606259346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,3,0.011802667131026586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,3,0.011600000162919363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,3,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,3,0.011519999553759893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,3,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,3,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,3,0.011701333026091257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,3,0.01156266654531161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,3,0.011071999867757162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,3,0.012245333443085352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,3,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,3,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,3,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,3,0.01173866664369901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,7,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,7,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,7,0.011205332974592844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,7,0.011194666226704916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,7,0.011626667032639185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,7,0.011706666400035223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,7,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,7,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,7,0.01332266628742218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,7,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,7,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,7,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,7,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,7,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,7,0.01091733326514562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,7,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,15,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,15,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,15,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,15,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,15,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,15,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,15,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,15,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,15,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,15,0.011567999919255575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,15,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,15,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,15,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,15,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,31,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,31,0.01101333275437355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,31,0.011141333729028702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,31,0.010837333897749582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,31,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,31,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,31,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,31,0.011653333902359009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,31,0.01128000020980835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,31,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,31,0.01080000028014183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,31,0.012560000022252401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,31,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,31,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,31,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,63,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,31,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,63,0.012805332740147909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,63,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,63,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,63,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,63,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,63,0.011045332998037338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,63,0.012858666479587555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,63,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,63,0.011695999652147293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,63,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,63,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,63,0.011557333171367645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,63,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,63,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,63,0.011183999478816986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,127,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,127,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,127,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,127,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,127,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,127,0.011087999989589056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,127,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,127,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,127,0.01118933285276095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,127,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,127,0.01341333364446958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,127,0.010672000547250112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,127,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,127,0.010933333386977514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,127,0.011690666278203329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,255,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,255,0.01139733319481214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,255,0.01210133358836174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,255,0.011663999408483505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,255,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,255,0.012106666962305704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,255,0.011898666620254517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,255,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,255,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,255,0.010970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,255,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,255,0.011482667177915573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,255,0.01089599976936976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,255,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,511,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,511,0.020560000091791153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,511,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,511,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,511,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,511,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,511,0.01209066684047381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,511,0.012768000364303589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,511,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,511,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,511,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,511,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,511,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,511,0.012874666601419449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,511,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,511,0.013471999516089758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,1023,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,1023,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,1023,0.014469332993030548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,1023,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,1023,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,1023,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,1023,0.011589333415031433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,1023,0.013349333157142004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,1023,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,1023,0.01370666672786077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,1023,0.01351999988158544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,1023,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,1023,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,1023,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,1023,0.011535999675591787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,1023,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,2047,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,2047,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,2047,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,2047,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,2047,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,2047,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,2047,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,2047,0.014752000570297241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,2047,0.01802666609485944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,2047,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,2047,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,2047,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,2047,0.014192000031471252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,2047,0.013781332721312841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,2047,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,2047,0.014314666390419006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,4095,0.02734400083621343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,4095,0.02160000056028366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,4095,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,4095,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,4095,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,4095,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,4095,0.01651200031240781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,4095,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,4095,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,4095,0.02083733429511388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,4095,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,4095,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,4095,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,4095,0.015530666957298914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,4095,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,4095,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,8191,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,8191,0.029552000264326733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,8191,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,8191,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,8191,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,8191,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,8191,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,8191,0.01958400011062622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,8191,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,8191,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,8191,0.019674666225910187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,8191,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,8191,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,8191,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,8191,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,8191,0.0176959993938605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,16383,0.02796799937884013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,16383,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,16383,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,16383,0.025642665723959606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,16383,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,16383,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,16383,0.026165333886941273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,16383,0.02606400102376938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,16383,0.025120000044504803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,16383,0.027749332288901012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,16383,0.024319998919963837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,16383,0.021717332303524017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,16383,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,16383,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,16383,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,16383,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,32767,0.048800001541773476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,32767,0.054661333560943604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,32767,0.05436799923578898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,32767,0.048938666780789696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,32767,0.04762133459250132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,32767,0.04877866804599762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,32767,0.04906666775544485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,32767,0.04860800007979075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,32767,0.029850666721661884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,32767,0.02972800036271413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,32767,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,32767,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,32767,0.026015999416510265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,32767,0.025631998976071674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,32767,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,32767,0.02611733227968216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,65535,0.07414933542410533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,65535,0.0804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,65535,0.08716266353925069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,65535,0.07307733098665874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,65535,0.07235733171304067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,65535,0.07291733225186665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,65535,0.07214400172233582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,65535,0.07323733468850453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,65535,0.04957333207130432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,65535,0.052389333645502724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,65535,0.050016000866889954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,65535,0.04473066826661428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,65535,0.04465066889921824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,65535,0.04353600243727366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,65535,0.04353600243727366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,65535,0.0436160018046697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,4,1,1,131071,0.12100799878438313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,4,1,2,131071,0.13201600313186646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,4,1,4,131071,0.15389866630236307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,4,1,8,131071,0.12076800068219502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,4,1,16,131071,0.1213759978612264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,4,1,32,131071,0.12119999527931213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,4,1,64,131071,0.12064533432324727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,4,1,1,131071,0.07100800176461537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,4,1,128,131071,0.12135466933250427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,4,1,2,131071,0.07479999959468842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,4,1,4,131071,0.08102933565775554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,4,1,8,131071,0.06559466818968455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,4,1,16,131071,0.06484800080458324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,4,1,32,131071,0.06613866488138835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,4,1,64,131071,0.06570133566856384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,4,1,128,131071,0.06596800188223521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,1,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,1,0.011237333218256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,1,0.012698666503032049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,1,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,1,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,1,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,1,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,1,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,1,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,1,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,1,0.011215999722480774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,1,0.010885333021481832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,1,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,1,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,3,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,3,0.01119999960064888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,3,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,3,0.01192533348997434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,3,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,3,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,3,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,3,0.011674666156371435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,3,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,3,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,3,0.012975999464591345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,3,0.011685332904259363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,3,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,3,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,3,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,3,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,7,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,7,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,7,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,7,0.011114666859308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,7,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,7,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,7,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,7,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,7,0.012847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,7,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,7,0.01157333329319954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,7,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,7,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,7,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,7,0.011616000284751257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,7,0.011359999577204386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,15,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,15,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,15,0.011055999745925268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,15,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,15,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,15,0.011077333241701126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,15,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,15,0.01340266689658165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,15,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,15,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,15,0.010768000036478043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,15,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,15,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,15,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,31,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,31,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,31,0.011402666568756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,31,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,31,0.011589333415031433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,31,0.011136000355084738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,31,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,31,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,31,0.013264000415802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,31,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,31,0.011711999773979187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,31,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,31,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,31,0.010954666882753372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,31,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,31,0.010698666175206503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,63,0.012837332983811697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,63,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,63,0.012821332861979803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,63,0.011178666104873022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,63,0.011770666887362799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,63,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,63,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,63,0.011381333072980246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,63,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,63,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,63,0.011370666325092316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,63,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,63,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,63,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,63,0.010773333410422007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,63,0.010805333654085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,127,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,127,0.011722666521867117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,127,0.012826666235923767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,127,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,127,0.011434666812419891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,127,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,127,0.010879999647537867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,127,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,127,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,127,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,127,0.011173332730929056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,127,0.010911999891201654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,127,0.01097600037852923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,127,0.011530666301647821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,127,0.010735999792814255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,127,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,255,0.013541333377361298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,255,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,255,0.01116266722480456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,255,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,255,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,255,0.011653333902359009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,255,0.011231999844312668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,255,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,255,0.011493333925803503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,255,0.011285333583752314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,255,0.011226666470368704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,255,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,255,0.011002667248249054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,255,0.01121066634853681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,511,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,511,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,511,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,511,0.012800000607967377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,511,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,511,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,511,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,511,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,511,0.022890667120615642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,511,0.014688000082969666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,511,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,511,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,511,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,511,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,511,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,511,0.011722666521867117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,1023,0.020224000016848247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,1023,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,1023,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,1023,0.014565333724021912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,1023,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,1023,0.013530666629473368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,1023,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,1023,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,1023,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,1023,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,1023,0.013376000026861826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,1023,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,1023,0.01403733342885971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,1023,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,1023,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,1023,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,2047,0.02759466568628947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,2047,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,2047,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,2047,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,2047,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,2047,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,2047,0.01587733378012975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,2047,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,2047,0.021418665846188862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,2047,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,2047,0.015743999431530636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,2047,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,2047,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,2047,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,2047,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,2047,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,4095,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,4095,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,4095,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,4095,0.017727999637524288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,4095,0.01971199984351794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,4095,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,4095,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,4095,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,4095,0.02181333303451538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,4095,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,4095,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,4095,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,4095,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,4095,0.016229332735141117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,4095,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,4095,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,8191,0.027962667246659596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,8191,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,8191,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,8191,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,8191,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,8191,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,8191,0.023749334116776783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,8191,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,8191,0.02508266766866048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,8191,0.024725332856178284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,8191,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,8191,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,8191,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,8191,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,8191,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,8191,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,16383,0.049914668003718056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,16383,0.052282666166623436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,16383,0.055434669057528176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,16383,0.04683733483155569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,16383,0.045909335215886436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,16383,0.0462719996770223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,16383,0.050928001602490745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,16383,0.046629334489504494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,16383,0.029994666576385498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,16383,0.027744000156720478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,16383,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,16383,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,16383,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,16383,0.024351999163627625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,16383,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,16383,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,32767,0.07420800129572551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,32767,0.07750933369000752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,32767,0.0703893353541692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,32767,0.08914132912953694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,32767,0.07015466690063477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,32767,0.06943466762701671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,32767,0.06897599995136261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,32767,0.06851199766000111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,32767,0.04833066463470459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,32767,0.047781333327293396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,32767,0.04159999887148539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,32767,0.05052266518274943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,32767,0.04062400013208389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,32767,0.04012266546487808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,32767,0.04030933231115341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,32767,0.041008000572522484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,65535,0.12361600001653035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,65535,0.12875200311342874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,65535,0.1567466656366984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,65535,0.12114666899045308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,65535,0.12085866928100586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,65535,0.12017599741617839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,65535,0.11918933192888896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,65535,0.1211893359820048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,65535,0.07309333483378093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,65535,0.07211733361085255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,65535,0.08130133152008057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,65535,0.06378666559855144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,65535,0.06368533273537953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,65535,0.06384000182151794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,65535,0.06300266583760579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,65535,0.06373333434263866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,8,1,1,131071,0.2232853372891744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,8,1,2,131071,0.24079465866088867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,8,1,4,131071,0.28887999057769775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,8,1,8,131071,0.22909333308537802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,8,1,16,131071,0.22793600956598917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,8,1,32,131071,0.22805333137512207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,8,1,64,131071,0.2291146715482076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,8,1,128,131071,0.2285919984181722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,8,1,1,131071,0.12057600418726604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,8,1,2,131071,0.11621866623560588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,8,1,4,131071,0.14366933703422546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,8,1,8,131071,0.10544000069300334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,8,1,16,131071,0.10494933525721233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,8,1,32,131071,0.1051626702149709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,8,1,64,131071,0.10569066802660625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,1,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,8,1,128,131071,0.1056160032749176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,1,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,1,0.012448000411192576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,1,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,1,0.011034666250149408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,1,0.01192533348997434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,1,0.01110400011142095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,1,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,1,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,1,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,1,0.011317333827416102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,1,0.01138666644692421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,1,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,1,0.011029332876205444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,1,0.01360000049074491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,1,0.010928000013033548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,3,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,3,0.013461332768201828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,3,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,3,0.010863999525705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,3,0.01145600030819575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,3,0.012383999923865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,3,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,3,0.014677333335081736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,3,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,3,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,3,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,3,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,3,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,3,0.011242666592200598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,3,0.010981333752473196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,3,0.010960000256697336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,7,0.014602666099866232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,7,0.013157332936922709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,7,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,7,0.01228800043463707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,7,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,7,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,7,0.01126933346192042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,7,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,7,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,7,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,7,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,7,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,7,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,7,0.011146667102972666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,7,0.010874666273593903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,15,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,15,0.01310933381319046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,15,0.01320533330241839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,15,0.011781333635250727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,15,0.011461333682139715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,15,0.011727999895811081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,15,0.01102399950226148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,15,0.01166933278242747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,15,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,15,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,15,0.01090666651725769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,15,0.011717333147923151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,15,0.011253333340088526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,15,0.011018666128317514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,15,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,15,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,31,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,31,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,31,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,31,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,31,0.010992000500361124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,31,0.010858666151762009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,31,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,31,0.011349332829316458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,31,0.013669333110253016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,31,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,31,0.011120000233252844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,31,0.011616000284751257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,31,0.01109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,31,0.011125333607196808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,31,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,31,0.01073066641887029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,63,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,63,0.013594667116800943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,63,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,63,0.011365332951148352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,63,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,63,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,63,0.011450666934251785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,63,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,63,0.013983999689420065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,63,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,63,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,63,0.012410666793584824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,63,0.010890666395425797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,63,0.011264000087976456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,63,0.010826667149861654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,63,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,127,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,127,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,127,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,127,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,127,0.01202133297920227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,127,0.010869332899649939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,127,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,127,0.0116799995303154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,127,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,127,0.012634667257467905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,127,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,127,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,127,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,127,0.011557333171367645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,127,0.011152000476916632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,127,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,255,0.01370666672786077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,255,0.013525333255529404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,255,0.013482666263977686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,255,0.011413333316644033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,255,0.011354666203260422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,255,0.012298667182525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,255,0.011648000528415045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,255,0.011578666667143503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,255,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,255,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,255,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,255,0.010853332777818045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,255,0.011551999797423681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,255,0.011584000041087469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,255,0.010949333508809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,255,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,511,0.016741332908471424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,511,0.01826133330663045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,511,0.01563199982047081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,511,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,511,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,511,0.013616000612576803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,511,0.012789333860079447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,511,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,511,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,511,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,511,0.013594667116800943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,511,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,511,0.013445333888133367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,511,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,511,0.011754666765530905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,511,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,1023,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,1023,0.020975999534130096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,1023,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,1023,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,1023,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,1023,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,1023,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,1023,0.015669333438078564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,1023,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,1023,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,1023,0.015568000574906668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,1023,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,1023,0.013552000125249227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,1023,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,1023,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,1023,0.020597333709398907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,2047,0.021744000415007275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,2047,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,2047,0.02863466739654541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,2047,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,2047,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,2047,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,2047,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,2047,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,2047,0.02184533327817917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,2047,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,2047,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,2047,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,2047,0.015530666957298914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,2047,0.013914667069911957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,2047,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,2047,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,4095,0.029338667790095013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,4095,0.02771199991305669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,4095,0.03105599929889043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,4095,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,4095,0.021722666919231415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,4095,0.021536000072956085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,4095,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,4095,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,4095,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,4095,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,4095,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,4095,0.01889066646496455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,4095,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,4095,0.018378666291634243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,4095,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,4095,0.017594666530688603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,8191,0.048709332942962646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,8191,0.05213866631189982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,8191,0.05381333331267039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,8191,0.04568000137805939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,8191,0.04390400151411692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,8191,0.04385599990685781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,8191,0.044666667779286705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,8191,0.044639999667803444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,8191,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,8191,0.028789333999156952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,8191,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,8191,0.02274133265018463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,8191,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,8191,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,8191,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,8191,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,16383,0.07635200023651123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,16383,0.0765226682027181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,16383,0.09006933371225993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,16383,0.07229333122571309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,16383,0.07163733243942261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,16383,0.07075199981530507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,16383,0.07161599894364674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,16383,0.07037333150704701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,16383,0.05020800232887268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,16383,0.04867733518282572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,16383,0.050053333242734276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,16383,0.04200000067551931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,16383,0.04101866732041041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,16383,0.04025600105524063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,16383,0.03965866565704346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,16383,0.03996799886226654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,32767,0.12849600116411844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,32767,0.1306773324807485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,32767,0.16344533363978067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,32767,0.12224533160527547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,32767,0.1200266679128011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,32767,0.11966933806737264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,32767,0.11910933256149292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,32767,0.11955199639002483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,32767,0.07509866853555043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,32767,0.07176533341407776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,32767,0.08373866478602092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,32767,0.06266666452089946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,32767,0.062165334820747375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,32767,0.06277866661548615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,32767,0.06168533364931742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,32767,0.06218666831652323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,65535,0.23249600330988565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,65535,0.2343733310699463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,65535,0.31092800696690875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,65535,0.22118399540583292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,65535,0.21798932552337646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,65535,0.21967999140421549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,65535,0.21790399154027304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,65535,0.21900266408920288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,65535,0.12778666615486145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,65535,0.1523413360118866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,65535,0.11717866857846577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,65535,0.10711999734242757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,65535,0.10446400443712871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,65535,0.10538666447003682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,65535,0.10590933760007222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,65535,0.1058026651541392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,16,1,1,131071,0.4410986502965291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,16,1,2,131071,0.44284268220265705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,16,1,8,131071,0.4243306716283162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,16,1,4,131071,0.6057973305384318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,16,1,16,131071,0.4198026657104492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,16,1,32,131071,0.4174400170644124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,16,1,64,131071,0.42218132813771564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,16,1,128,131071,0.42426665623982746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,16,1,1,131071,0.23029865821202597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,16,1,2,131071,0.20884267489115396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,16,1,4,131071,0.28912534316380817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,16,1,8,131071,0.19309866428375244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,16,1,16,131071,0.19131733973821005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,16,1,32,131071,0.19191465775171915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,1,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,16,1,64,131071,0.19150932629903158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,16,1,128,131071,0.1917440096537272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,1,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,1,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,1,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,1,0.013408000270525614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,1,0.013909333695967993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,1,0.012794667234023413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,1,0.013514666507641474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,1,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,1,0.013248000293970108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,1,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,1,0.0143306665122509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,1,0.01331199953953425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,1,0.011658667276302973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,1,0.011488000551859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,1,0.011663999408483505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,3,0.014767999450365702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,3,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,3,0.013280000537633896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,3,0.013605333864688873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,3,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,3,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,3,0.01292266696691513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,3,0.013610667238632837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,3,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,3,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,3,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,3,0.011813333878914515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,3,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,3,0.011861333002646765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,3,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,3,0.011306667079528173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,7,0.013445333888133367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,7,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,7,0.013807999591032663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,7,0.01309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,7,0.012928000340859095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,7,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,7,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,7,0.013199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,7,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,7,0.012608000387748083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,7,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,7,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,7,0.011258666714032492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,7,0.012181332955757776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,7,0.012159999459981918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,7,0.012154666086037954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,15,0.013663999736309052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,15,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,15,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,15,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,15,0.012938667088747025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,15,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,15,0.013749333719412485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,15,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,15,0.01563199982047081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,15,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,15,0.01293333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,15,0.011621333658695221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,15,0.011061333119869232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,15,0.011605333536863327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,15,0.01098666712641716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,15,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,31,0.014138666292031607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,31,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,31,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,31,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,31,0.013381333400805792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,31,0.012608000387748083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,31,0.013061333447694778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,31,0.012698666503032049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,31,0.016602666427691776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,31,0.012906666845083237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,31,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,31,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,31,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,31,0.011221333096424738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,31,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,31,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,63,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,63,0.014181333283583323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,63,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,63,0.013861333330472311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,63,0.01341333364446958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,63,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,63,0.012890666723251343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,63,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,63,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,63,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,63,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,63,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,63,0.011007999380429586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,63,0.0116799995303154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,63,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,63,0.0116799995303154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,127,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,127,0.015637333194414776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,127,0.013306666165590286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,127,0.013530666629473368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,127,0.012586666891972223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,127,0.01312000056107839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,127,0.013002666334311167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,127,0.013818666338920593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,127,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,127,0.013829333086808523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,127,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,127,0.013274667163689932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,127,0.011098666737476984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,127,0.011338666081428528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,127,0.011039999624093374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,127,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,255,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,255,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,255,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,255,0.013440000514189402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,255,0.013525333255529404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,255,0.013317332913478216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,255,0.012853333105643591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,255,0.012997332960367203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,255,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,255,0.013151999562978745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,255,0.012863999853531519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,255,0.011834666132926941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,255,0.01108266661564509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,255,0.011130666981140772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,255,0.011343999455372492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,255,0.01163200040658315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,511,0.02186133215824763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,511,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,511,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,511,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,511,0.013258667041858038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,511,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,511,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,511,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,511,0.021935999393463135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,511,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,511,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,511,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,511,0.013066666821638743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,511,0.013482666263977686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,511,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,511,0.012869333227475485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,1023,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,1023,0.025279998779296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,1023,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,1023,0.01635733370979627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,1023,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,1023,0.01571200042963028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,1023,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,1023,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,1023,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,1023,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,1023,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,1023,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,1023,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,1023,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,1023,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,1023,0.013210666676362356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,2047,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,2047,0.028042666614055634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,2047,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,2047,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,2047,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,2047,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,2047,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,2047,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,2047,0.025765334566434223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,2047,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,2047,0.021525333325068157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,2047,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,2047,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,2047,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,2047,0.016645333419243496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,2047,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,4095,0.05073600014050802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,4095,0.05194666484991709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,4095,0.05530133346716563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,4095,0.04172799984614054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,4095,0.042725334564844765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,4095,0.04221866528193156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,4095,0.041663999358812966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,4095,0.04218666752179464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,4095,0.034048000971476235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,4095,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,4095,0.02979733298222224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,4095,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,4095,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,4095,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,4095,0.01995733380317688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,4095,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,8191,0.07718933125336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,8191,0.07898666461308797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,8191,0.099589337905248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,8191,0.07064533233642578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,8191,0.06900799771149953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,8191,0.07016000151634216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,8191,0.06828799843788147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,8191,0.06843199829260509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,8191,0.05208000044027964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,8191,0.049509331583976746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,8191,0.039706667264302574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,8191,0.03734933336575826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,8191,0.052426666021347046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,8191,0.037978666524092354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,8191,0.037791999677817024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,8191,0.03713600089152654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,16383,0.1304213305314382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,16383,0.13168000181516012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,16383,0.16492266456286112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,16383,0.12273066242535909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,16383,0.11954666177431743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,16383,0.12026666601498921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,16383,0.11831999818483989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,16383,0.0796319991350174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,16383,0.12043733398119609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,16383,0.07263466715812683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,16383,0.08567466338475545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,16383,0.06233599781990051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,16383,0.060975998640060425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,16383,0.060266668597857155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,16383,0.06003733476003011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,16383,0.060736000537872314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,32767,0.23609066009521484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,32767,0.23973333835601807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,32767,0.312336007754008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,32767,0.2283733288447062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,32767,0.2226933240890503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,32767,0.22310400009155273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,32767,0.2234613299369812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,32767,0.22300267219543457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,32767,0.13010666767756143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,32767,0.15465600291887918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,32767,0.1226026713848114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,32767,0.1072746713956197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,32767,0.10622400045394897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,32767,0.10689066847165425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,32767,0.1067626674969991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,32767,0.10572266578674316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,65535,0.44361599286397296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,65535,0.44942935307820636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,65535,0.6119626760482788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,65535,0.437173326810201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,65535,0.43241600195566815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,65535,0.4323466618855794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,65535,0.4289333422978719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,65535,0.43220798174540204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,65535,0.23497599363327026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,65535,0.21819732586542764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,65535,0.28886399666468304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,65535,0.19745065768559775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,65535,0.19647467136383057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,65535,0.19582400719324747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,65535,0.1959679921468099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,65535,0.19574934244155884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,32,1,1,131071,0.8618186314900717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,32,1,2,131071,0.8761386871337891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,32,1,4,131071,1.1948479811350505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,32,1,8,131071,0.8477973143259684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,32,1,16,131071,0.8399893442789713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,32,1,32,131071,0.8374933401743571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,32,1,128,131071,0.8389493624369303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,32,1,64,131071,0.840224027633667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,32,1,1,131071,0.44155200322469074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,32,1,2,131071,0.41025598843892414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,32,1,8,131071,0.3773333231608073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,32,1,4,131071,0.5604960123697916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,32,1,16,131071,0.37538135051727295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,32,1,32,131071,0.37439465522766113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,1,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,32,1,64,131071,0.37507200241088867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,32,1,128,131071,0.3755520184834798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,1,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,1,0.014597332725922266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,1,0.013141332815090815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,1,0.013072000195582708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,1,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,1,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,1,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,1,0.017818666994571686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,1,0.013962666193644205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,1,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,1,0.012250666817029318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,1,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,1,0.011498666057984034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,1,0.011312000453472137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,1,0.010938666760921478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,3,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,3,0.013295999417702356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,3,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,3,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,3,0.012970666090647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,3,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,3,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,3,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,3,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,3,0.014138666292031607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,3,0.012896000097195307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,3,0.013877333452304205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,3,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,3,0.011274666835864386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,3,0.011109333485364914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,3,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,7,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,7,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,7,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,7,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,7,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,7,0.013536000003417334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,7,0.01351999988158544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,7,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,7,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,7,0.01854933301607768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,7,0.01339200014869372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,7,0.013007999708255133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,7,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,7,0.011776000261306763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,7,0.011839999506870905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,7,0.01369599997997284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,15,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,15,0.01303999995191892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,15,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,15,0.013738666971524557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,15,0.013327999661366144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,15,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,15,0.012901333471139273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,15,0.012730666746695837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,15,0.017727999637524288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,15,0.013093333691358566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,15,0.013045333325862885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,15,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,15,0.011296000331640244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,15,0.012506666282812754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,15,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,15,0.011418666690587997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,31,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,31,0.013440000514189402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,31,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,31,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,31,0.012960000584522883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,31,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,31,0.013770667215188345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,31,0.013503999759753546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,31,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,31,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,31,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,31,0.0129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,31,0.013013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,31,0.011322667201360067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,31,0.01267733300725619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,31,0.011941333611806234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,63,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,63,0.014346666634082794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,63,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,63,0.012986666212479273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,63,0.013167999684810638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,63,0.013669333110253016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,63,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,63,0.012965332716703415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,63,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,63,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,63,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,63,0.011247999966144562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,63,0.014208000153303146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,63,0.011066666493813196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,63,0.011328000575304031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,63,0.010922666639089584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,127,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,127,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,127,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,127,0.013450667262077332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,127,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,127,0.01333333303531011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,127,0.013130666067202887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,127,0.013525333255529404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,127,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,127,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,127,0.012879999975363413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,127,0.013749333719412485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,127,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,127,0.01239466667175293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,127,0.011407999942700068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,127,0.012736000120639801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,255,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,255,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,255,0.013642666240533194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,255,0.012949333836634954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,255,0.01302933320403099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,255,0.013077333569526672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,255,0.014250667144854864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,255,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,255,0.013370666652917862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,255,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,255,0.013365333278973898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,255,0.012527999778588613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,255,0.012634667257467905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,255,0.01129066695769628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,255,0.011610666910807291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,511,0.018687999496857326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,255,0.011760000139474869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,511,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,511,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,511,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,511,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,511,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,511,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,511,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,511,0.01970133309563001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,511,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,511,0.022677332162857056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,511,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,511,0.013855999956528345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,511,0.013610667238632837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,511,0.012784000486135483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,511,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,1023,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,1023,0.027109332382678986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,1023,0.030906667311986286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,1023,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,1023,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,1023,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,1023,0.020069333414236706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,1023,0.018895999838908512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,1023,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,1023,0.02588266630967458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,1023,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,1023,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,1023,0.016293333222468693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,1023,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,1023,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,1023,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,2047,0.04603200157483419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,2047,0.05301333467165629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,2047,0.0551146666208903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,2047,0.044362664222717285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,2047,0.04358933369318644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,2047,0.041802664597829185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,2047,0.041749333341916404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,2047,0.042090664307276406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,2047,0.0296426663796107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,2047,0.031898667414983116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,2047,0.02940800040960312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,2047,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,2047,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,2047,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,2047,0.01886933296918869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,2047,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,4095,0.0711359977722168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,4095,0.08083733419577281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,4095,0.08870399991671245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,4095,0.07154133419195811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,4095,0.07049066821734111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,4095,0.06857599814732869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,4095,0.06843733290831248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,4095,0.06837333242098491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,4095,0.04771733283996582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,4095,0.05152533451716105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,4095,0.049957334995269775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,4095,0.04028266668319702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,4095,0.0383840004603068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,4095,0.037503999968369804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,4095,0.03787733366092046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,4095,0.037834666669368744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,8191,0.12396799524625142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,8191,0.13545599579811096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,8191,0.16102932890256247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,8191,0.12315733234087627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,8191,0.1188800036907196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,8191,0.12054399649302165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,8191,0.11974933743476868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,8191,0.07472000022729237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,8191,0.11958932876586914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,8191,0.0767680009206136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,8191,0.06298666695753734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,8191,0.06088533500830332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,8191,0.0839519997437795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,8191,0.06039999922116598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,8191,0.060319999853769936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,8191,0.05930666625499725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,16383,0.22883733113606772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,16383,0.2407253384590149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,16383,0.2288586695988973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,16383,0.30797332525253296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,16383,0.22510399421056113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,16383,0.22317866484324136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,16383,0.22272533178329468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,16383,0.2246133287747701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,16383,0.1267626682917277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,16383,0.1263040006160736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,16383,0.15224533279736838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,16383,0.10911466677983601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,16383,0.10725333293279012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,16383,0.10654933253924052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,16383,0.10648000240325928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,16383,0.10571199655532837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,32767,0.43957332770029706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,32767,0.4522293408711751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,32767,0.6005333264668783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,32767,0.43983999888102215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,32767,0.4315040111541748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,32767,0.4305386543273926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,32767,0.43004798889160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,32767,0.43040533860524494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,32767,0.22939733664194742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,32767,0.22075732549031576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,32767,0.19946134090423584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,32767,0.28861866394678753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,32767,0.1962933341662089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,32767,0.19539199272791544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,32767,0.19580799341201782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,32767,0.1960373322168986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,65535,0.8549226919809977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,65535,0.8738986651102701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,65535,0.8532959620157877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,65535,1.1859040260314941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,65535,0.841386636098226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,65535,0.841327985127767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,65535,0.8402613004048666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,65535,0.8380853335062662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,65535,0.4378559986750285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,65535,0.41410664717356366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,65535,0.37620798746744794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,65535,0.5611786842346191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,65535,0.37567468484242755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,65535,0.37569065888722736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,65535,0.3742080132166545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,65535,0.37380798657735187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,64,1,1,131071,1.6929386456807454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,64,1,2,131071,1.709722677866618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,64,1,8,131071,1.6678986549377441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,64,1,4,131071,2.3461546897888184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,64,1,16,131071,1.6431840260823567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,64,1,32,131071,1.646341323852539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,64,1,64,131071,1.6499999364217122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,64,1,128,131071,1.6501812934875488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,64,1,1,131071,0.8495786984761556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,64,1,2,131071,0.7986400127410889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,64,1,4,131071,1.1042346954345703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,64,1,8,131071,0.7348053455352783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,64,1,16,131071,0.7347040176391602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,64,1,32,131071,0.7355413436889648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,64,1,64,131071,0.73416535059611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,1,0.01958400011062622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,1,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,1,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,1,0.013733333597580591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,1,0.013717333475748697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,1,0.014773332824309668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,1,0.013536000003417334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,1,0.01422400027513504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,1,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,64,1,128,131071,0.7334986527760824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,1,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,1,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,1,0.013082666943470636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,1,0.013194666554530462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,1,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,1,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,1,0.013056000073750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,3,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,3,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,3,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,3,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,3,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,3,0.013178666432698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,3,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,3,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,3,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,3,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,3,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,3,0.013173333058754602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,3,0.013493333011865616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,3,0.013104000439246496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,3,0.013125333935022354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,3,0.012917333592971167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,7,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,7,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,7,0.014085333794355392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,7,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,7,0.013424000392357508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,7,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,7,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,7,0.012981332838535309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,7,0.023941333095232647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,7,0.017562666287024815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,7,0.01682666689157486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,7,0.01328533391157786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,7,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,7,0.01301866645614306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,7,0.013269333789745966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,7,0.011168000598748526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,15,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,15,0.01607999950647354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,15,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,15,0.019717333217461903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,15,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,15,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,15,0.014325333138306936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,15,0.013482666263977686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,15,0.025648000339667004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,15,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,15,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,15,0.013359999905029932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,15,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,15,0.013386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,15,0.012757333616415659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,15,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,31,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,31,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,31,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,31,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,31,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,31,0.013679999858140945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,31,0.013557333499193192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,31,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,31,0.025600001215934753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,31,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,31,0.017632000148296356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,31,0.013290667285521826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,31,0.013162666310866674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,31,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,31,0.012229333321253458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,63,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,31,0.013034666577974955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,63,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,63,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,63,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,63,0.013343999783198038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,63,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,63,0.013242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,63,0.013376000026861826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,63,0.023887999355793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,63,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,63,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,63,0.01321600005030632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,63,0.013253333667914072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,63,0.012773333738247553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,63,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,63,0.012885333349307379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,127,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,127,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,127,0.0229120006163915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,127,0.014773332824309668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,127,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,127,0.01470400020480156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,127,0.013434667140245438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,127,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,127,0.02496533344189326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,127,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,127,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,127,0.013183999806642532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,127,0.012954667210578918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,127,0.01314666618903478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,127,0.012944000462690989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,127,0.013338666409254074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,255,0.02197866638501485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,255,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,255,0.019600000232458115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,255,0.014069333672523499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,255,0.014186666657527288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,255,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,255,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,255,0.013829333086808523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,255,0.026746665438016255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,255,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,255,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,255,0.013050666699806849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,255,0.01322666679819425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,255,0.012768000364303589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,255,0.013088000317414602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,255,0.013114667187134424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,511,0.031930667658646904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,511,0.024058667321999867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,511,0.030117332935333252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,511,0.0179626668492953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,511,0.018325333793958027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,511,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,511,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,511,0.017781333376963932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,511,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,511,0.030293333033720653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,511,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,511,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,511,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,511,0.013189333180586496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,511,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,511,0.013882666826248169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,1023,0.050255998969078064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,1023,0.05218133330345154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,1023,0.04748799900213877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,1023,0.04192533095677694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,1023,0.041759997606277466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,1023,0.0403466671705246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,1023,0.04191466669241587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,1023,0.04139200101296107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,1023,0.040805332362651825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,1023,0.02661866694688797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,1023,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,1023,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,1023,0.01852799952030182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,1023,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,1023,0.018415999909241993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,1023,0.017685333887736004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,2047,0.07704000174999237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,2047,0.07455466687679291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,2047,0.09250666697820027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,2047,0.06937066713968913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,2047,0.06818666557470958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,2047,0.06701866785685222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,2047,0.06780266761779785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,2047,0.06854933500289917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,2047,0.05573866764704386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,2047,0.04673066735267639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,2047,0.03773866593837738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,2047,0.051781331499417625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,2047,0.039493332306543984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,2047,0.03624533365170161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,2047,0.03586133321126302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,2047,0.03626133253177007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,4095,0.12899200121561685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,4095,0.13121599952379862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,4095,0.16541866461435953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,4095,0.12053866187731425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,4095,0.11937600374221802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,4095,0.11738133430480957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,4095,0.11896000305811565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,4095,0.11780800422032674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,4095,0.08396266897519429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,4095,0.07067200044790904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,4095,0.08822932839393616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,4095,0.06140799820423126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,4095,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,4095,0.05901333192984263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,4095,0.059808000922203064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,4095,0.05907199780146281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,8191,0.23323200146357217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,8191,0.23561066389083862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,8191,0.3144586682319641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,8191,0.22588266928990683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,8191,0.22239466508229574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,8191,0.22277865807215372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,8191,0.22357332706451416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,8191,0.22250133752822876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,8191,0.13693867127100626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,8191,0.12102400263150533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,8191,0.1557973325252533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,8191,0.10707199573516846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,8191,0.10582933823267619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,8191,0.10569600264231364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,8191,0.10587199529012044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,8191,0.10455999771753947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,16383,0.44432532787323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,16383,0.4491893450419108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,16383,0.6059786478678385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,16383,0.4309920072555542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,16383,0.43031466007232666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,16383,0.42712533473968506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,16383,0.42863468329111737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,16383,0.23989333709081015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,16383,0.42989333470662433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,16383,0.216538667678833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,16383,0.19732266664505005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,16383,0.29370667537053424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,16383,0.19604800144831339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,16383,0.19490132729212442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,16383,0.1945120096206665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,16383,0.19434134165445963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,32767,0.863813320795695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,32767,0.8681600093841553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,32767,0.8419946829477946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,32767,1.2372746467590332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,32767,0.8323787053426107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,32767,0.837498664855957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,32767,0.8362133502960205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,32767,0.8372853597005209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,32767,0.44764800866444904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,32767,0.4101066589355469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,32767,0.5645600159962972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,32767,0.37624533971150714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,32767,0.3747413158416748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,32767,0.3757813374201457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,32767,0.3744959831237793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,32767,0.3736586570739746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,65535,1.6949812571207683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,65535,1.6848586400349934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,65535,1.650368054707845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,65535,2.495306650797526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,65535,1.6387573877970378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,65535,1.6406985918680828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,65535,1.6354719797770183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,65535,1.6317973136901855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,65535,0.8632319768269857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,65535,0.7969919840494791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,65535,0.7344960371653239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,65535,1.1322826544443767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,65535,0.7332213719685873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,65535,0.7347306410471598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,65535,0.7341600259145101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,65535,0.7338773409525553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,128,1,1,131071,3.365989367167155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,128,1,2,131071,3.408320109049479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,128,1,8,131071,3.3127307891845703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,128,1,4,131071,4.985082626342773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,128,1,16,131071,3.308586756388346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,128,1,32,131071,3.3087145487467446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,128,1,64,131071,3.2951040267944336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,128,1,128,131071,3.303167978922526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,128,1,1,131071,1.6953013737996419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,128,1,2,131071,1.5634667078653972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,128,1,4,131071,2.2852907180786133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,128,1,8,131071,1.4494400024414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,128,1,16,131071,1.4512853622436523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,128,1,32,131071,1.4519093831380208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,1,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,1,0.02295999974012375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,1,0.02827200045188268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,1,0.02006400004029274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,1,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,1,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,128,1,64,131071,1.4455359776814778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,1,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,1,0.018816000471512478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,1,0.041802664597829185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,1,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,1,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,1,0.01764800027012825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,1,0.016127999871969223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,1,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,1,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,1,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,128,1,128,131071,1.4484319686889648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,3,0.027866666515668232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,3,0.022341333329677582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,3,0.027962667246659596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,3,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,3,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,3,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,3,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,3,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,3,0.04205866654713949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,3,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,3,0.025663999219735462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,3,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,3,0.01568000018596649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,3,0.01575999955336253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,3,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,3,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,7,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,7,0.022970666488011677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,7,0.028090665737787884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,7,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,7,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,7,0.01956266661485036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,7,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,7,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,7,0.041637333730856575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,7,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,7,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,7,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,7,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,7,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,7,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,7,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,15,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,15,0.02827200045188268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,15,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,15,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,15,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,15,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,15,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,15,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,15,0.041984001795450844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,15,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,15,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,15,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,15,0.016735999534527462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,15,0.015919999529918034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,15,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,15,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,31,0.029653333127498627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,31,0.02162133405605952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,31,0.02790933350721995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,31,0.01964266722400983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,31,0.019551999866962433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,31,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,31,0.019578666736682255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,31,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,31,0.025631998976071674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,31,0.04154133299986521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,31,0.025754667818546295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,31,0.018618666877349217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,31,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,31,0.01618133361140887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,31,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,31,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,63,0.02160000056028366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,63,0.027829334139823914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,63,0.028357334434986115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,63,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,63,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,63,0.01882133384545644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,63,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,63,0.01952533299724261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,63,0.041946664452552795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,63,0.025685332715511322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,63,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,63,0.01782400036851565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,63,0.016229332735141117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,63,0.01595199977358182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,63,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,63,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,127,0.03209600100914637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,127,0.027530667682488758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,127,0.02162666618824005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,127,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,127,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,127,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,127,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,127,0.018415999909241993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,127,0.04250133534272512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,127,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,127,0.026005332668622334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,127,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,127,0.015530666957298914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,127,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,127,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,127,0.015850666910409927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,255,0.04271999994913737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,255,0.02993600070476532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,255,0.032144000132878624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,255,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,255,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,255,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,255,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,255,0.04776533444722494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,255,0.02922666569550832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,255,0.019632000476121902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,255,0.025701334079106648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,255,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,255,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,255,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,255,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,255,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,511,0.05797866483529409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,511,0.05091199775536855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,511,0.05489066739877065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,511,0.04253333310286204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,511,0.044309332966804504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,511,0.04484800000985464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,511,0.04190400242805481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,511,0.041984001795450844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,511,0.03997333347797394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,511,0.055717334151268005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,511,0.03692266593376795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,511,0.02093333254257838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,511,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,511,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,511,0.019786667078733444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,511,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,1023,0.08628267049789429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,1023,0.09223999579747517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,1023,0.07922666768232982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,1023,0.06962133447329204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,1023,0.06777599950631459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,1023,0.06845333178838094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,1023,0.06806399921576183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,1023,0.07003733515739441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,1023,0.0688266654809316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,1023,0.05464000006516775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,1023,0.05606933434804281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,1023,0.03977066775163015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,1023,0.03782399992148081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,1023,0.037477334340413414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,1023,0.037861332297325134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,1023,0.0374293327331543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,2047,0.13222933808962503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,2047,0.1393226683139801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,2047,0.16929600636164346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,2047,0.12277866403261821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,2047,0.12036800384521484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,2047,0.1202133297920227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,2047,0.1209440032641093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,2047,0.11945066849390666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,2047,0.08186666667461395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,2047,0.09083732962608337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,2047,0.09778666496276855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,2047,0.06572799881299336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,2047,0.061834668119748436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,2047,0.062447999914487205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,2047,0.061903998255729675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,2047,0.06078400214513143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,4095,0.24528533220291138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,4095,0.2384480039278666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,4095,0.22776534159978232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,4095,0.312991996606191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,4095,0.22386133670806885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,4095,0.22394132614135742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,4095,0.22202666600545248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,4095,0.223088006178538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,4095,0.15174399813016257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,4095,0.13154133160909018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,4095,0.10990400115648906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,4095,0.1612160007158915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,4095,0.10628267129262288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,4095,0.10549867153167725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,4095,0.10608533024787903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,4095,0.10519466797510783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,8191,0.45632533232371014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,8191,0.449349323908488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,8191,0.4349706570307414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,8191,0.6058559815088908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,8191,0.4331573247909546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,8191,0.43423465887705487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,8191,0.43111467361450195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,8191,0.43078935146331787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,8191,0.2582239905993144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,8191,0.2283466657002767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,8191,0.29526400566101074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,8191,0.20173333088556925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,8191,0.19746132691701254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,8191,0.19788267215092978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,8191,0.19673067331314087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,8191,0.19697066148122153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,16383,0.8781173229217529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,16383,0.8710933526357015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,16383,1.2292799949645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,16383,0.8499840100606283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,16383,0.8444693088531494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,16383,0.8385547002156576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,16383,0.8434666792551676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,16383,0.8415626684824625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,16383,0.4669119914372762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,16383,0.42370665073394775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,16383,0.5650879939397176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,16383,0.38180267810821533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,16383,0.3789333502451579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,16383,0.3773546616236369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,16383,0.37886401017506915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,16383,0.37810667355855304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,32767,1.7286186218261719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,32767,1.7252747217814128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,32767,2.5545706748962402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,32767,1.6824960708618164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,32767,1.6760320663452148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,32767,1.6745012601216633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,32767,1.6688639322916667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,32767,1.67905060450236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,32767,0.8871839841206869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,32767,0.8097919623057047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,32767,0.7426559925079346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,32767,1.1235093275705974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,32767,0.7414506276448568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,32767,0.7404159704844157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,32767,0.7376853624979655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,32767,0.7370826403299967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,256,1,1,65535,3.4205118815104165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,256,1,2,65535,3.381061236063639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,256,1,8,65535,3.309525489807129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,256,1,4,65535,5.227434794108073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,256,1,16,65535,3.3016745249430337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,256,1,32,65535,3.2861814498901367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,256,1,64,65535,3.2945706049601235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,256,1,128,65535,3.30073610941569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,256,1,2,65535,1.583013375600179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,256,1,1,65535,1.7231787045796711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,256,1,8,65535,1.4616212844848633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,256,1,4,65535,2.329098701477051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,256,1,16,65535,1.4628267288208008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,256,1,32,65535,1.462949275970459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,1,0.035973332822322845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,1,0.048325334986050926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,1,0.027402666707833607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,1,0.04412800073623657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,256,1,64,65535,1.4628639221191406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,1,0.0277813325325648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,1,0.027664000789324444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,1,0.028650666276613872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,1,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,1,0.0749066670735677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,1,0.044922664761543274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,256,1,128,65535,1.4605120023091633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,1,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,1,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,1,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,1,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,1,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,1,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,3,0.05063466727733612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,3,0.03551466763019562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,3,0.043621331453323364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,3,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,3,0.027978666126728058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,3,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,3,0.02769600103298823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,3,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,3,0.07543466488520305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,3,0.045007998744646706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,3,0.04025600105524063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,3,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,3,0.021583999196688335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,3,0.021717332303524017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,3,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,3,0.021738665799299877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,7,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,7,0.04914666712284088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,7,0.04379733403523763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,7,0.027984000742435455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,7,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,7,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,7,0.027615999182065327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,7,0.02714666724205017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,7,0.07452799876530965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,7,0.043712000052134194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,7,0.04049066702524821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,7,0.025792000194390614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,7,0.0223786657055219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,7,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,7,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,7,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,15,0.04818666477998098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,15,0.035429333647092186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,15,0.043749332427978516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,15,0.02805333336194356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,15,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,15,0.027024000883102417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,15,0.02738133321205775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,15,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,15,0.07471466561158498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,15,0.043968002001444496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,15,0.03997866561015447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,15,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,15,0.022458667556444805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,15,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,15,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,15,0.020970667401949566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,31,0.04901866614818573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,31,0.04390400151411692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,31,0.03558400024970373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,31,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,31,0.027621333797772724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,31,0.027829334139823914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,31,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,31,0.02757333219051361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,31,0.07442133128643036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,31,0.045610666275024414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,31,0.039621333281199135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,31,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,31,0.02161066730817159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,31,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,31,0.020917333662509918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,31,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,63,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,63,0.03543466577927271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,63,0.04398933549722036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,63,0.028207999964555103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,63,0.02775466690460841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,63,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,63,0.02719466636578242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,63,0.027797333896160126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,63,0.07448533177375793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,63,0.04538666705290476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,63,0.04002666721741358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,63,0.026021334032217663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,63,0.02163733293612798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,63,0.02176533391078313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,63,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,63,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,127,0.05242133140563965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,127,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,127,0.02994133283694585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,127,0.05036266644795736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,127,0.027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,127,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,127,0.027637332677841187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,127,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,127,0.07526933153470357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,127,0.04630400240421295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,127,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,127,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,127,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,127,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,127,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,127,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,255,0.06716266771157582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,255,0.05819733440876007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,255,0.0561706672112147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,255,0.045935998360315956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,255,0.04307200014591217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,255,0.04188266893227895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,255,0.04227200150489807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,255,0.04124266654253006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,255,0.08155733346939087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,255,0.055498664577802025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,255,0.04458666841189066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,255,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,255,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,255,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,255,0.02094399929046631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,255,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,511,0.09083200494448344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,511,0.08905599514643352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,511,0.09067199627558391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,511,0.07446399827798207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,511,0.07080533107121785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,511,0.07202133536338806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,511,0.06903466582298279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,511,0.07190933326880138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,511,0.09168000022570293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,511,0.06780266761779785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,511,0.06186666587988535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,511,0.04001600046952566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,511,0.04557333389918009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,511,0.03950933367013931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,511,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,511,0.039813332259655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,1023,0.14268267154693604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,1023,0.14095999797185263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,1023,0.15609600146611533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,1023,0.12378666798273723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,1023,0.12592533230781555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,1023,0.1228000024954478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,1023,0.12148267030715942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,1023,0.11818666259447734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,1023,0.12091199556986491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,1023,0.09358400106430054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,1023,0.09286399682362874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,1023,0.06444799900054932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,1023,0.06218666831652323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,1023,0.06930133203665416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,1023,0.06297599772612254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,1023,0.061477333307266235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,2047,0.2435093323389689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,2047,0.2539359927177429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,2047,0.2908373276392619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,2047,0.23454399903615317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,2047,0.23517866929372153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,2047,0.22982933123906454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,2047,0.22899732987085977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,2047,0.2285333275794983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,2047,0.16911999384562174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,2047,0.14589866995811462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,2047,0.1567359964052836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,2047,0.11513066291809082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,2047,0.10993599891662598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,2047,0.10917333761850993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,2047,0.10817066828409831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,2047,0.10778666536013286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,4095,0.4342133204142253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,4095,0.4755680163701375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,4095,0.5500160058339437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,4095,0.4456160068511963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,4095,0.44328534603118896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,4095,0.4397653341293335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,4095,0.43936534722646076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,4095,0.43796801567077637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,4095,0.271232008934021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,4095,0.24542399247487387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,4095,0.27994134028752643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,4095,0.20848000049591064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,4095,0.20379199584325156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,4095,0.20090667406717935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,4095,0.19963200887044272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,4095,0.2015519936879476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,8191,0.8229813575744629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,8191,0.9111680189768473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,8191,1.083679993947347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,8191,0.8618293603261312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,8191,0.8666026592254639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,8191,0.8613333702087402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,8191,0.8526879946390787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,8191,0.8525226910909017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,8191,0.46139732996622723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,8191,0.4434346755345662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,8191,0.5200426578521729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,8191,0.39273067315419513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,8191,0.3883093198140462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,8191,0.38603734970092773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,8191,0.3841386636098226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,8191,0.38437334696451825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,16383,1.5962400436401367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,16383,1.7702560424804688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,16383,1.69267733891805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,16383,2.244592030843099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,16383,1.6951200167338054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,16383,1.6843892733256023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,16383,1.6766773859659831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,16383,1.68614927927653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,16383,0.8428693612416586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,16383,0.8359946409861246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,16383,0.7608106931050619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,16383,1.0054240226745605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,16383,0.754634698232015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,16383,0.7528693675994873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,16383,0.749338706334432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,16383,0.7517920335133871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,512,1,1,32767,3.146208127339681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,512,1,8,32767,3.390453338623047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,512,1,2,32767,3.536992073059082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,512,1,4,32767,4.800378799438477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,512,1,16,32767,3.378863970438639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,512,1,32,32767,3.381253242492676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,512,1,128,32767,3.3732000986735025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,512,1,64,32767,3.3814080556233725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,512,1,1,32767,1.6048000653584797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,512,1,2,32767,1.6215893427530925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,512,1,4,32767,2.0419519742329917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,512,1,8,32767,1.4957493146260579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,512,1,16,32767,1.4904853502909343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,1,0.08268266419569652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,512,1,32,32767,1.4862346649169922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,1,0.05894400179386139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,512,1,64,32767,1.485007921854655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,1,0.07815466821193695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,1,0.04790933430194855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,512,1,128,32767,1.4862933158874512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,1,0.04318400224049886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,1,0.04156800111134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,1,0.04203199843565623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,1,0.04153066625197729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,1,0.13737066586812338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,1,0.0803413341442744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,1,0.07468800246715546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,1,0.041333332657814026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,1,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,1,0.033930666744709015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,1,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,1,0.03133866687615713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,3,0.081194669008255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,3,0.058362667759259544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,3,0.07880533238252004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,3,0.04970133304595947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,3,0.04310933252175649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,3,0.041989331444104515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,3,0.04287466903527578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,3,0.04154133299986521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,3,0.13807466626167297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,3,0.0790773332118988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,3,0.04116799930731455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,3,0.0748586654663086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,3,0.03390933324893316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,3,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,3,0.03139200061559677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,3,0.031712000568707786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,7,0.08088000118732452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,7,0.05771199862162272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,7,0.08073066671689351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,7,0.052522664268811546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,7,0.043418665726979576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,7,0.04194133480389913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,7,0.04167466859022776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,7,0.043040002385775246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,7,0.13876266280810037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,7,0.0802346666653951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,7,0.0738506664832433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,7,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,7,0.03358400116364161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,7,0.031343999008337654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,7,0.03179733455181122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,7,0.03166933357715607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,15,0.08080533146858215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,15,0.05819733440876007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,15,0.048026666045188904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,15,0.07829866806666057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,15,0.04372799893220266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,15,0.042394667863845825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,15,0.04156800111134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,15,0.041722665230433144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,15,0.13802133003870645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,15,0.07876266539096832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,15,0.04065066576004028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,15,0.07381866872310638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,15,0.03368533402681351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,15,0.03169066707293192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,15,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,15,0.03126933425664902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,31,0.08065600196520488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,31,0.05825066566467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,31,0.04896000027656555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,31,0.07979733248551686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,31,0.043280000487963356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,31,0.042863999803860985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,31,0.04214933514595032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,31,0.04191466669241587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,31,0.07900799810886383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,31,0.1374559998512268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,31,0.04144533226887385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,31,0.07472000022729237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,31,0.03349333256483078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,31,0.03195200115442276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,31,0.03139200061559677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,31,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,63,0.08145066599051158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,63,0.05841066439946493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,63,0.08016533156236012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,63,0.05007466673851013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,63,0.04418133199214935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,63,0.04187199970086416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,63,0.042064001162846885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,63,0.042208001017570496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,63,0.13798933227856955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,63,0.07866666714350383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,63,0.0740586668252945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,63,0.042090664307276406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,63,0.03365866591533025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,63,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,63,0.03202133377393087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,63,0.03183999905983607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,127,0.08694400389989217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,127,0.06758399804433186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,127,0.08415466547012329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,127,0.057775999108950295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,127,0.05422399938106537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,127,0.05417066812515259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,127,0.052469333012898765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,127,0.052517334620157875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,127,0.13806399703025818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,127,0.08201600114504497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,127,0.07512533167997996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,127,0.045509333411852516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,127,0.03583466758330663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,127,0.03136000037193298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,127,0.03219199925661087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,127,0.031231999397277832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,255,0.1157973309357961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,255,0.0901759962240855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,255,0.09604799747467041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,255,0.0732426643371582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,255,0.07065066695213318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,255,0.06771199901898702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,255,0.06779199838638306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,255,0.06857066849867503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,255,0.14959466457366943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,255,0.09422399600346883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,255,0.07684266567230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,255,0.05083733300367991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,255,0.042634665966033936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,255,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,255,0.03977066775163015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,255,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,511,0.16379732886950174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,511,0.14229866862297058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,511,0.16381333271662393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,511,0.12744533022244772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,511,0.12130133310953777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,511,0.11858133474985759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,511,0.12077866991360982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,511,0.11680000027020772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,511,0.16914133230845133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,511,0.11370133360226949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,511,0.10783466696739197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,511,0.07264000177383423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,511,0.06587199866771698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,511,0.06270933151245117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,511,0.06151466568311056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,511,0.06131733457247416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,1023,0.2640426754951477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,1023,0.24495999018351236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,1023,0.2966720064481099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,1023,0.22090667486190796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,1023,0.2148053248723348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,1023,0.212826669216156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,1023,0.21237866083780924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,1023,0.21317867437998453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,1023,0.22073600689570108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,1023,0.16294399897257486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,1023,0.17089066902796426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,1023,0.10988799730936687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,1023,0.11834667126337688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,1023,0.10653866330782573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,1023,0.10541333754857381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,1023,0.10549333691596985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,2047,0.4650293191274007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,2047,0.45821865399678546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,2047,0.5622773170471191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,2047,0.4296746651331584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,2047,0.4219626585642497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,2047,0.41679465770721436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,2047,0.41881601015726727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,2047,0.41703466574350995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,2047,0.32055999835332233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,2047,0.2590826749801636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,2047,0.2961546579996745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,2047,0.2062079906463623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,2047,0.1973653237024943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,2047,0.19363733132680258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,2047,0.19003732999165854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,2047,0.18935465812683105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,4095,0.8530346552530924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,4095,0.8732960224151611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,4095,1.0803360144297283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,4095,0.8260052998860677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,4095,0.8169653415679932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,4095,0.8190826574961344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,4095,0.8119946320851644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,4095,0.8142240047454834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,4095,0.5226560036341349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,4095,0.4442880153656006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,4095,0.5423200130462646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,4095,0.37880531946818036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,4095,0.36771198113759357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,4095,0.36576000849405926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,4095,0.3625599940617879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,4095,0.3614933490753174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,8191,1.6259840329488118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,8191,1.6957866350809734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,8191,2.1444053649902344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,8191,1.616223971048991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,8191,1.5957387288411458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,8191,1.6165653864542644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,8191,1.6112853686014812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,8191,1.6067360242207844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,8191,0.9042186737060547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,8191,0.8072799841562907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,8191,1.0200213591257732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,8191,0.72215469678243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,8191,0.7132480144500732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,8191,0.7096373240152994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,8191,0.7061066627502441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,8191,0.7055359681447347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,128,1024,1,1,16383,3.1798505783081055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,64,1024,1,2,16383,3.3100694020589194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,32,1024,1,4,16383,4.511589368184407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,16,1024,1,8,16383,3.1888694763183594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,8,1024,1,16,16383,3.1694186528523765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,4,1024,1,32,16383,3.1930131912231445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,2,1024,1,64,16383,3.1880321502685547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,float16,1,1024,1,128,16383,3.1594934463500977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,128,1024,1,1,16383,1.6623786290486653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,64,1024,1,2,16383,1.5318986574808757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,32,1024,1,4,16383,1.991978645324707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,16,1024,1,8,16383,1.4100906054178874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,4,1024,1,32,16383,1.3937546412150066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,8,1024,1,16,16383,1.3986345926920574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,2,1024,1,64,16383,1.3900052706400554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_generation,default,float16,fp8,1,1024,1,128,16383,1.3897493680318196
