framework,version,device,op_name,kernel_source,m,k,quant_dtype,latency
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,12288,fp8,0.19872852961222331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,16384,fp8,0.26382400194803873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,10240,fp8,0.16643733978271485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,8192,fp8,0.13389973640441893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,5120,fp8,0.08495466709136963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,6144,fp8,0.10143786271413166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,7168,fp8,0.11779839992523193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,4096,fp8,0.04934613307317098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,3072,fp8,0.026792534192403156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,3584,fp8,0.03253759940465291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,2048,fp8,0.018962132930755615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,1536,fp8,0.014947199821472168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,2560,fp8,0.022719999154408775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,1024,fp8,0.010811733206113179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,512,fp8,0.007090133428573608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,768,fp8,0.008784000078837078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,256,fp8,0.005009066561857859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,128,fp8,0.0038005332152048744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,64,fp8,0.003442133218050003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,32,fp8,0.0029887999097506206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,12288,fp8,0.10165759722391765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,16384,fp8,0.13412373860677082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,8192,fp8,0.05201493501663208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,10240,fp8,0.08543679714202881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,7168,fp8,0.036611199378967285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,6144,fp8,0.026764800151189167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,4096,fp8,0.018702934185663857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,5120,fp8,0.022689066330591836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,3072,fp8,0.014909866452217101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,3584,fp8,0.01689280072848002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,2048,fp8,0.010763733585675558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,2560,fp8,0.01325440009435018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,1024,fp8,0.006706133484840393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,1536,fp8,0.009141332904497783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,512,fp8,0.005003733436266581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,768,fp8,0.005890133480230967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,256,fp8,0.0037791999677817024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,128,fp8,0.003437866767247518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,64,fp8,0.0029738667110602063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,32,fp8,0.0025642665723959604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,16384,fp8,0.05036906798680624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,12288,fp8,0.02690133253733317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,10240,fp8,0.02300693392753601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8192,65536,fp8,1.0309354782104492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,8192,fp8,0.01867093245188395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,7168,fp8,0.01700906753540039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,5120,fp8,0.012837333480517068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,6144,fp8,0.014935466647148132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4096,65536,fp8,0.5208330790201823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,4096,fp8,0.010785067081451416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,3584,fp8,0.009985066453615824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,3072,fp8,0.009129599730173747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,2560,fp8,0.00796693315108617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,2048,fp8,0.0066890666882197065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,1536,fp8,0.005827199916044871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,512,fp8,0.0038389332592487337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,768,fp8,0.004666666686534882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,1024,fp8,0.004680533210436503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,128,fp8,0.002980266759792964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,64,fp8,0.002993066608905792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,256,fp8,0.003389866650104523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,32,fp8,0.0030602666238943735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,16384,fp8,0.01881813406944275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,12288,fp8,0.015068800250689188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,10240,fp8,0.013212800025939941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,8192,fp8,0.01086293359597524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,7168,fp8,0.009914666414260864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,6144,fp8,0.009152000149091084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,4096,fp8,0.007132799923419952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,3584,fp8,0.006631466746330261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,3072,fp8,0.005890133480230967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,2560,fp8,0.005459199845790863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,2048,fp8,0.004686933259169261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,1536,fp8,0.004667733112970988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,1024,fp8,0.0038421332836151125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,768,fp8,0.0037845333417256674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,512,fp8,0.0034005333979924522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,256,fp8,0.00306986669699351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,128,fp8,0.0025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2048,65536,fp8,0.2630751927693685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,5120,fp8,0.007778133451938629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,64,fp8,0.002647466709216436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,32,fp8,0.0026154667139053345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,12288,fp8,0.01186240017414093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,10240,fp8,0.01060693363348643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,16384,fp8,0.014839466412862143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,8192,fp8,0.008739200234413148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,7168,fp8,0.008312533299128216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,6144,fp8,0.007929599781831106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,5120,fp8,0.007117866476376851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,4096,fp8,0.005829333265622457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,3584,fp8,0.005599999924500784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1024,65536,fp8,0.1336458683013916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,3072,fp8,0.005054933329423269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,2560,fp8,0.004647466540336609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,2048,fp8,0.004576000074545542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,1536,fp8,0.003803733239571253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,768,fp8,0.0034485332667827605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,1024,fp8,0.003835733234882355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,512,fp8,0.0033962666988372804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,256,fp8,0.0029898665845394133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,128,fp8,0.0025888000925381976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,64,fp8,0.0025781333446502686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,32,fp8,0.0025759999950726825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,12288,fp8,0.00872213343779246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,16384,fp8,0.010874666770299276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,8192,fp8,0.007066666583220164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,10240,fp8,0.007941333452860515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,7168,fp8,0.006677333513895671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,6144,fp8,0.005860266586144766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,768,65536,fp8,0.10153706868489583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,5120,fp8,0.005445333321889242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,4096,fp8,0.004646400113900503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,3584,fp8,0.005067733426888784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,3072,fp8,0.004638933142026265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,2560,fp8,0.0042463997999827065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,2048,fp8,0.0037823999921480812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,1536,fp8,0.0038143999874591826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,1024,fp8,0.0033632000287373864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,768,fp8,0.0033919999996821085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,512,fp8,0.0030410667260487873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,256,fp8,0.0029237332443396253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,128,fp8,0.002569599946339925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,64,fp8,0.0026528000831604003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,32,fp8,0.0025813333690166474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,12288,fp8,0.007533866663773854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,16384,fp8,0.008728532989819845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,10240,fp8,0.007146666447321574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,8192,fp8,0.005854933460553487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,512,65536,fp8,0.050621867179870605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,7168,fp8,0.005464533468087515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,5120,fp8,0.0050335998336474095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,6144,fp8,0.005448533097902933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,4096,fp8,0.004593066871166229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,3584,fp8,0.004192000130812327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,3072,fp8,0.0038730666041374207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,2560,fp8,0.0037834666669368743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,2048,fp8,0.003807999938726425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,1536,fp8,0.0033962666988372804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,1024,fp8,0.003028266628583272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,768,fp8,0.002977066735426585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,512,fp8,0.0030645333230495454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,256,fp8,0.0029397333661715193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,128,fp8,0.0025685332715511323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,64,fp8,0.0025429333249727885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,32,fp8,0.002605866640806198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,16384,fp8,0.006761600077152252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,384,65536,fp8,0.0268010675907135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,12288,fp8,0.005859200159708659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,10240,fp8,0.005871999760468801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,8192,fp8,0.004678399860858917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,7168,fp8,0.005051733553409576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,6144,fp8,0.0042805333932240805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,5120,fp8,0.004242133100827535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,4096,fp8,0.003735466549793879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,3584,fp8,0.0038453333079814913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,3072,fp8,0.004048000027736028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,2560,fp8,0.0034442665676275887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,2048,fp8,0.003402666747570038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,1536,fp8,0.003385599950949351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,1024,fp8,0.0030229332546393077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,768,fp8,0.002585600068171819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,512,fp8,0.0029493334392706556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,256,fp8,0.0025781333446502686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,128,fp8,0.002625066787004471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,64,fp8,0.0025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,32,fp8,0.0025343999266624452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,16384,fp8,0.005967999994754791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,12288,fp8,0.005468800167242686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,256,65536,fp8,0.01876373291015625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,10240,fp8,0.004631466666857402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,8192,fp8,0.004582400123278299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,7168,fp8,0.004182399809360504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,6144,fp8,0.003772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,5120,fp8,0.003833599885304769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,4096,fp8,0.0038463999827702843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,3584,fp8,0.003798400113979975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,3072,fp8,0.0033717334270477297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,2560,fp8,0.003366400053103765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,2048,fp8,0.0029728000362714132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,1536,fp8,0.002979200085004171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,1024,fp8,0.002996266633272171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,768,fp8,0.0029578665892283124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,512,fp8,0.002755200117826462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,256,fp8,0.0026357332865397137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,128,fp8,0.0025759999950726825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,64,fp8,0.002586666742960612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,32,fp8,0.0025941332181294756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,16384,fp8,0.005463466544946035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,192,65536,fp8,0.014838400483131408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,12288,fp8,0.005029333134492239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,10240,fp8,0.004626133541266123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,8192,fp8,0.004204800228277842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,7168,fp8,0.003885866701602936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,5120,fp8,0.0037802666425704955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,6144,fp8,0.003852800031503042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,4096,fp8,0.00342399999499321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,3584,fp8,0.003432533393303553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,3072,fp8,0.0033973333736260734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,2560,fp8,0.0037920000652472176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,2048,fp8,0.002962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,1536,fp8,0.0030048000315825146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,1024,fp8,0.0029738667110602063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,768,fp8,0.0029834667841593427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,512,fp8,0.0025973332424958544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,256,fp8,0.0025429333249727885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,128,fp8,0.0025770666698614756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,64,fp8,0.0026229334374268847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,32,fp8,0.0025568000972270967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,160,65536,fp8,0.012831999858220419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,16384,fp8,0.004649599889914194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,12288,fp8,0.004640000065167745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,10240,fp8,0.004241066674391428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,8192,fp8,0.003881600002447764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,7168,fp8,0.003786666691303253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,6144,fp8,0.003719466676314672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,5120,fp8,0.003357866654793421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,4096,fp8,0.00342399999499321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,3584,fp8,0.0033610666791598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,3072,fp8,0.003387733300526937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,2560,fp8,0.003019733230272929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,2048,fp8,0.0029877332349618276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,1536,fp8,0.0026229334374268847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,1024,fp8,0.002609066665172577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,768,fp8,0.0026176000634829206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,512,fp8,0.002586666742960612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,256,fp8,0.0025685332715511323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,128,fp8,0.0025962665677070618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,64,fp8,0.0025685332715511323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,32,fp8,0.002535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,128,65536,fp8,0.010782933235168457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,16384,fp8,0.0046186665693918865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,12288,fp8,0.0038624001046021783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,10240,fp8,0.003446399917205175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,8192,fp8,0.0038058665891488397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,7168,fp8,0.0034272000193595886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,6144,fp8,0.0034175999462604523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,5120,fp8,0.0034730667869249977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,4096,fp8,0.0034058667719364167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,3584,fp8,0.0029824001093705496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,3072,fp8,0.002976000060637792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,2560,fp8,0.002997333308060964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,2048,fp8,0.0029824001093705496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,1536,fp8,0.0030080000559488933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,1024,fp8,0.0025546667476495106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,768,fp8,0.0026101333399613695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,512,fp8,0.00257493332028389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,256,fp8,0.0025898667673269907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,128,fp8,0.00262719988822937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,64,fp8,0.002600533266862233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,32,fp8,0.002629333237806956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,96,65536,fp8,0.008713600039482117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,16384,fp8,0.00417493333419164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,10240,fp8,0.0038101332883040107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,12288,fp8,0.0038431999584039056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,8192,fp8,0.003786666691303253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,7168,fp8,0.0033610666791598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,6144,fp8,0.0034773332377274835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,5120,fp8,0.0034783999125162757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,4096,fp8,0.002979200085004171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,3584,fp8,0.0029781334102153777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,3072,fp8,0.0029738667110602063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,2560,fp8,0.0029834667841593427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,2048,fp8,0.002998399982849757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,1536,fp8,0.002566399921973546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,1024,fp8,0.0025898667673269907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,768,fp8,0.00257493332028389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,512,fp8,0.00262719988822937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,256,fp8,0.002549333373705546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,128,fp8,0.002595199892918269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,64,fp8,0.00257493332028389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,32,fp8,0.0026335999369621276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,16384,fp8,0.003870933254559835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,80,65536,fp8,0.007921066880226136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,12288,fp8,0.003772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,10240,fp8,0.003732266773780187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,8192,fp8,0.0034261333445707956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,7168,fp8,0.0033834666013717652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,6144,fp8,0.0030720000465710956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,5120,fp8,0.0030965333183606463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,4096,fp8,0.0030410667260487873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,3584,fp8,0.003010133405526479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,3072,fp8,0.002603733291228612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,2560,fp8,0.0025546667476495106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,2048,fp8,0.0026015999416510267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,1536,fp8,0.0029365333418051405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,1024,fp8,0.0026122666895389557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,768,fp8,0.0025813333690166474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,512,fp8,0.002584533393383026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,256,fp8,0.0025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,128,fp8,0.0026186667382717133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,64,fp8,0.002593066543340683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,32,fp8,0.002569599946339925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,64,65536,fp8,0.00674773355325063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,16384,fp8,0.0038773333032925926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,12288,fp8,0.003446399917205175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,10240,fp8,0.0034656000634034475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,8192,fp8,0.0030026666820049284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,7168,fp8,0.0029674666623274487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,6144,fp8,0.003335466732581457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,5120,fp8,0.0029919999341169994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,4096,fp8,0.003013333429892858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,3072,fp8,0.0029824001093705496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,3584,fp8,0.002631466587384542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,2560,fp8,0.002548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,2048,fp8,0.0026101333399613695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,1536,fp8,0.0025461333493391673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,1024,fp8,0.0025898667673269907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,768,fp8,0.002584533393383026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,512,fp8,0.0025568000972270967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,256,fp8,0.0025898667673269907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,128,fp8,0.0026357332865397137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,64,fp8,0.002381866673628489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,32,fp8,0.0025994665920734406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,16384,fp8,0.00340693344672521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,48,65536,fp8,0.005871999760468801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,12288,fp8,0.0030218665798505146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,10240,fp8,0.003009066730737686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,8192,fp8,0.0030016000072161358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,7168,fp8,0.0029525332152843474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,6144,fp8,0.0029887999097506206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,5120,fp8,0.002974933385848999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,4096,fp8,0.002570666621128718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,3584,fp8,0.002552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,3072,fp8,0.002998399982849757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,2560,fp8,0.002644266684850057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,2048,fp8,0.002600533266862233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,1536,fp8,0.0026421333352724713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,1024,fp8,0.002600533266862233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,768,fp8,0.0025685332715511323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,256,fp8,0.002605866640806198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,512,fp8,0.002598399917284648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,128,fp8,0.002573866645495097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,64,fp8,0.0025600001215934755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,32,fp8,0.0022282667458057405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,32,65536,fp8,0.005027199784914652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,16384,fp8,0.0030389333764712016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,12288,fp8,0.003058133274316788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,10240,fp8,0.0025888000925381976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,8192,fp8,0.002605866640806198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,7168,fp8,0.002566399921973546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,6144,fp8,0.0029834667841593427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,5120,fp8,0.00262719988822937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,4096,fp8,0.0025888000925381976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,3584,fp8,0.0025802666942278544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,3072,fp8,0.0025770666698614756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,2560,fp8,0.002625066787004471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,2048,fp8,0.0026154667139053345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,1536,fp8,0.002548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,1024,fp8,0.002611200014750163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,768,fp8,0.0025653332471847535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,512,fp8,0.0025792000194390613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,256,fp8,0.0025909334421157838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,128,fp8,0.002525866776704788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,32,fp8,0.002194133400917053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,64,fp8,0.0027189334233601887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,16,65536,fp8,0.0038602667550245917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,16384,fp8,0.002976000060637792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,10240,fp8,0.0025920001169045764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,8192,fp8,0.002632533262173335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,12288,fp8,0.0027797333896160126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,7168,fp8,0.0026528000831604003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,6144,fp8,0.0026026666164398193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,5120,fp8,0.0027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,4096,fp8,0.00262719988822937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,3584,fp8,0.0025962665677070618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,2560,fp8,0.0025002665817737578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,2048,fp8,0.002585600068171819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,3072,fp8,0.0026869334280490874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,1536,fp8,0.0026047999660174055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,1024,fp8,0.0025461333493391673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,768,fp8,0.0026954665780067446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,512,fp8,0.0025813333690166474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,256,fp8,0.0025727999707063037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,64,fp8,0.002569599946339925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,32,fp8,0.0025813333690166474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,128,fp8,0.002700799951950709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,8,65536,fp8,0.003443199892838796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,16384,fp8,0.0025973332424958544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,12288,fp8,0.0027200000981489818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,10240,fp8,0.0025888000925381976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,8192,fp8,0.0025610665480295816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,6144,fp8,0.002625066787004471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,5120,fp8,0.0025813333690166474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,7168,fp8,0.0027477333943049112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,4096,fp8,0.002619733413060506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,3584,fp8,0.002573866645495097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,3072,fp8,0.002696533252795537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,2560,fp8,0.0025642665723959604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,2048,fp8,0.002611200014750163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,1024,fp8,0.0025792000194390613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,768,fp8,0.0025610665480295816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,1536,fp8,0.002598399917284648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,512,fp8,0.002584533393383026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,256,fp8,0.002205866575241089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,128,fp8,0.0025759999950726825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,64,fp8,0.002532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,32,fp8,0.0026154667139053345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,4,65536,fp8,0.0030261332790056865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,16384,fp8,0.0025994665920734406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,12288,fp8,0.002587733417749405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,10240,fp8,0.0025642665723959604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,8192,fp8,0.0025642665723959604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,7168,fp8,0.0026709333062171934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,6144,fp8,0.002552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,5120,fp8,0.0026101333399613695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,4096,fp8,0.0025034666061401365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,3584,fp8,0.0025909334421157838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,3072,fp8,0.00257493332028389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,2560,fp8,0.0025888000925381976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,2048,fp8,0.002550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,1536,fp8,0.002587733417749405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,1024,fp8,0.002584533393383026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,768,fp8,0.002593066543340683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,512,fp8,0.0025759999950726825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,256,fp8,0.0026122666895389557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,128,fp8,0.002219733347495397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,64,fp8,0.00257493332028389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,32,fp8,0.0022229333718617756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,2,65536,fp8,0.002994133283694585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,16384,fp8,0.0025610665480295816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,12288,fp8,0.0026410666604836782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,10240,fp8,0.002571733295917511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,8192,fp8,0.0025727999707063037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,7168,fp8,0.0025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,6144,fp8,0.002611200014750163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,5120,fp8,0.002621866762638092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,4096,fp8,0.0026154667139053345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,3584,fp8,0.002549333373705546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,3072,fp8,0.00264533335963885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,2560,fp8,0.0025589334468046824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,2048,fp8,0.0025759999950726825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,1536,fp8,0.0021589333812395734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,1024,fp8,0.0025813333690166474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,768,fp8,0.002243199944496155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,512,fp8,0.0026389333109060925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,256,fp8,0.0025909334421157838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,128,fp8,0.0025653332471847535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,64,fp8,0.0021738665799299877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,32,fp8,0.0025920001169045764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,scale_matrix,torch_ops,1,65536,fp8,0.0025727999707063037
