framework,version,device,op_name,kernel_source,m,k,quant_dtype,latency
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,12288,fp8,0.2040895938873291
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,10240,fp8,0.17160213788350423
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,16384,fp8,0.27095785140991213
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,8192,fp8,0.1377365271250407
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,7168,fp8,0.1209557294845581
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,6144,fp8,0.10423999627431232
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,3584,fp8,0.033920001983642575
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,4096,fp8,0.04967360099156697
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,5120,fp8,0.08726507027943929
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,3072,fp8,0.02813439965248108
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,2560,fp8,0.023915733893712363
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,2048,fp8,0.019780266284942626
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,1536,fp8,0.015287466843922935
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,1024,fp8,0.011190399527549744
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,768,fp8,0.00918933351834615
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,512,fp8,0.007102933526039123
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,256,fp8,0.005049600203831991
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,128,fp8,0.004232533276081085
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,64,fp8,0.0037610667447249093
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,32,fp8,0.0033098667860031127
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,12288,fp8,0.10440106391906738
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,16384,fp8,0.1377280076344808
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,10240,fp8,0.08782080014546713
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,8192,fp8,0.05407253503799438
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,6144,fp8,0.02834239999453227
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,7168,fp8,0.03251199920972188
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,5120,fp8,0.02349546750386556
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,4096,fp8,0.019429334004720054
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,3072,fp8,0.015708800156911215
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,3584,fp8,0.017682133118311565
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,2048,fp8,0.011161599556605022
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,2560,fp8,0.013286399841308593
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,1536,fp8,0.009529599547386169
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,1024,fp8,0.007397333284219106
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,768,fp8,0.006213333209355672
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,512,fp8,0.005054933329423269
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,256,fp8,0.0037248000502586366
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,128,fp8,0.003432533393303553
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,64,fp8,0.0029557332396507262
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,32,fp8,0.0025920001169045764
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,16384,fp8,0.053201067447662356
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,12288,fp8,0.028024532397588092
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,10240,fp8,0.02360426584879557
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,8192,fp8,0.01977919936180115
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,7168,fp8,0.01737706661224365
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,6144,fp8,0.015385599931081137
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,5120,fp8,0.013637333114941915
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,4096,fp8,0.011160533626874287
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,3584,fp8,0.010377599795659383
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,3072,fp8,0.009499733646710713
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,2560,fp8,0.008212266862392426
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,2048,fp8,0.007499733567237854
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,1536,fp8,0.005850666761398315
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,1024,fp8,0.005015466610590617
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,768,fp8,0.004966400067011515
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,512,fp8,0.0038133333126703895
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4096,65536,fp8,0.5389333089192708
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,256,fp8,0.0037205333511034647
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,128,fp8,0.003369600077470144
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8192,65536,fp8,1.0695968627929688
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,64,fp8,0.0029706666866938275
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,32,fp8,0.0025343999266624452
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,16384,fp8,0.01952426632245382
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,12288,fp8,0.015650133291880287
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,10240,fp8,0.013678933183352152
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,8192,fp8,0.011162666479746501
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,6144,fp8,0.009093333284060161
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,7168,fp8,0.01034986674785614
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,5120,fp8,0.008310399949550629
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,4096,fp8,0.007055999835332234
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,3584,fp8,0.006660266717274983
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,3072,fp8,0.00628053347269694
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,2560,fp8,0.005881600081920624
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,2048,fp8,0.005026133358478546
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,1024,fp8,0.004212266703446706
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,1536,fp8,0.004702933132648468
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,768,fp8,0.0042250668009122215
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,512,fp8,0.003368533402681351
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,256,fp8,0.0030218665798505146
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,128,fp8,0.002958933264017105
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2048,65536,fp8,0.27228374481201173
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,64,fp8,0.002979200085004171
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,32,fp8,0.0025472000241279604
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,12288,fp8,0.012412800391515096
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,16384,fp8,0.015341867009798685
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,8192,fp8,0.00914026697476705
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,10240,fp8,0.01118933359781901
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,7168,fp8,0.008775466680526733
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,6144,fp8,0.007935999830563863
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,5120,fp8,0.007478400071461995
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,4096,fp8,0.006231466432412466
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1024,65536,fp8,0.13873599370320638
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,3584,fp8,0.005852800110975901
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,3072,fp8,0.005459199845790863
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,2560,fp8,0.005022933085759481
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,1536,fp8,0.0038602667550245917
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,2048,fp8,0.004642133414745331
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,1024,fp8,0.003804799914360046
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,768,fp8,0.0037759999434153237
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,512,fp8,0.003425066669782003
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,256,fp8,0.003018666555484136
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,128,fp8,0.0025973332424958544
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,64,fp8,0.002549333373705546
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,32,fp8,0.0025439999997615816
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,16384,fp8,0.011528533697128297
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,12288,fp8,0.00909440020720164
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,10240,fp8,0.008344533046086629
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,8192,fp8,0.007189333438873291
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,768,65536,fp8,0.10515093008677165
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,7168,fp8,0.006647466619809468
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,6144,fp8,0.006263466676076253
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,5120,fp8,0.005836800237496694
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,4096,fp8,0.005054933329423269
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,3584,fp8,0.005019733309745788
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,3072,fp8,0.004615466793378194
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,2560,fp8,0.004243200023969015
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,2048,fp8,0.004220800101757049
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,1536,fp8,0.004063999901215235
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,1024,fp8,0.003789866715669632
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,768,fp8,0.0034080001215140024
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,512,fp8,0.003013333429892858
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,256,fp8,0.0026133333643277483
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,128,fp8,0.002997333308060964
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,64,fp8,0.002573866645495097
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,32,fp8,0.002611200014750163
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,16384,fp8,0.009204266468683879
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,12288,fp8,0.00819413314263026
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,512,65536,fp8,0.0524288018544515
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,10240,fp8,0.007121066749095917
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,8192,fp8,0.00588266650835673
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,6144,fp8,0.005475200215975444
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,7168,fp8,0.0062720000743865965
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,5120,fp8,0.005015466610590617
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,4096,fp8,0.0050016000866889955
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,3584,fp8,0.004228266576925913
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,3072,fp8,0.0038719999293486277
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,2560,fp8,0.0037952000896135964
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,2048,fp8,0.004186666508515676
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,1536,fp8,0.0033781332274278007
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,1024,fp8,0.0033621333539485933
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,768,fp8,0.0030122667551040648
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,512,fp8,0.002959999938805898
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,256,fp8,0.002962133288383484
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,128,fp8,0.0029386666913827257
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,64,fp8,0.0025653332471847535
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,32,fp8,0.0025920001169045764
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,16384,fp8,0.007422933479150136
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,384,65536,fp8,0.027964800596237183
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,12288,fp8,0.006259199976921081
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,10240,fp8,0.005895466605822245
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,8192,fp8,0.005067733426888784
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,7168,fp8,0.005018666883309682
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,6144,fp8,0.004987733562787374
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,5120,fp8,0.004241066674391428
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,4096,fp8,0.0038165333370367683
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,3584,fp8,0.003841066608826319
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,3072,fp8,0.00382080003619194
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,2560,fp8,0.003790933390458425
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,2048,fp8,0.003824000060558319
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,1536,fp8,0.003373866776625315
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,1024,fp8,0.0029535998900731405
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,768,fp8,0.0030016000072161358
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,512,fp8,0.002997333308060964
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,256,fp8,0.002550400048494339
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,128,fp8,0.0030250666042168934
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,64,fp8,0.0025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,32,fp8,0.002570666621128718
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,256,65536,fp8,0.01977919936180115
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,16384,fp8,0.006259199976921081
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,12288,fp8,0.005406933526198069
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,10240,fp8,0.0050357331832249965
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,8192,fp8,0.0046741331617037455
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,7168,fp8,0.004271999994913737
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,6144,fp8,0.0042250668009122215
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,5120,fp8,0.003870933254559835
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,4096,fp8,0.0037546666959921518
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,3584,fp8,0.0038218667109807336
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,3072,fp8,0.0037813333173592886
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,2560,fp8,0.0034122665723164878
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,2048,fp8,0.003421866645415624
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,1536,fp8,0.0029728000362714132
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,1024,fp8,0.002959999938805898
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,768,fp8,0.002998399982849757
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,512,fp8,0.0029728000362714132
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,256,fp8,0.00258240004380544
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,128,fp8,0.00262719988822937
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,64,fp8,0.002609066665172577
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,32,fp8,0.002535466601451238
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,192,65536,fp8,0.015706666310628257
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,16384,fp8,0.0057770664493242896
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,12288,fp8,0.005037866532802582
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,10240,fp8,0.005019733309745788
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,8192,fp8,0.004578133424123129
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,7168,fp8,0.0037770666182041167
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,6144,fp8,0.0037418665985266366
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,5120,fp8,0.0038880000511805216
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,3584,fp8,0.003769599894682566
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,4096,fp8,0.003458133339881897
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,3072,fp8,0.003368533402681351
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,2560,fp8,0.0034495999415715536
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,2048,fp8,0.0033642667035261786
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,1536,fp8,0.002948266764481862
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,1024,fp8,0.0026357332865397137
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,768,fp8,0.002958933264017105
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,512,fp8,0.0026079999903837843
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,256,fp8,0.002586666742960612
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,128,fp8,0.0026101333399613695
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,64,fp8,0.0025631998976071674
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,32,fp8,0.002587733417749405
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,160,65536,fp8,0.013226667046546936
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,16384,fp8,0.005006933212280273
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,12288,fp8,0.004730666677157084
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,10240,fp8,0.004220800101757049
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,8192,fp8,0.0038794666528701783
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,7168,fp8,0.003929600119590759
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,6144,fp8,0.00405973345041275
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,4096,fp8,0.0033749334514141084
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,5120,fp8,0.003977599988381068
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,3584,fp8,0.0033770665526390077
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,3072,fp8,0.003475199888149897
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,2560,fp8,0.003017599880695343
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,1536,fp8,0.002959999938805898
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,2048,fp8,0.0031338666876157125
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,1024,fp8,0.002994133283694585
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,768,fp8,0.0030858665704727173
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,512,fp8,0.002552533398071925
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,128,fp8,0.002603733291228612
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,256,fp8,0.0026986666023731233
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,64,fp8,0.002585600068171819
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,32,fp8,0.0026538667579491934
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,128,65536,fp8,0.011210667093594869
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,16384,fp8,0.005077333251635233
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,12288,fp8,0.0037941334148248037
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,10240,fp8,0.0038762666285037995
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,8192,fp8,0.00420053352912267
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,6144,fp8,0.0037845333417256674
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,7168,fp8,0.003931733220815659
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,5120,fp8,0.0033418667813142145
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,4096,fp8,0.0035103999078273775
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,3584,fp8,0.0033749334514141084
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,3072,fp8,0.003123199939727783
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,2560,fp8,0.002962133288383484
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,1536,fp8,0.0025685332715511323
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,2048,fp8,0.0030378667016824085
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,1024,fp8,0.002932266642649968
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,768,fp8,0.003010133405526479
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,512,fp8,0.002551466723283132
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,256,fp8,0.003019733230272929
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,128,fp8,0.0026186667382717133
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,32,fp8,0.002567466596762339
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,64,fp8,0.002586666742960612
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,96,65536,fp8,0.009113599856694538
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,16384,fp8,0.004658133288224538
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,12288,fp8,0.0038399999340375268
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,10240,fp8,0.0038581334054470064
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,8192,fp8,0.003432533393303553
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,7168,fp8,0.0034453332424163817
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,6144,fp8,0.0034005333979924522
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,5120,fp8,0.0034954667091369627
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,4096,fp8,0.003369600077470144
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,3072,fp8,0.002974933385848999
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,3584,fp8,0.0030752000709374744
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,2560,fp8,0.003010133405526479
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,2048,fp8,0.0031061333914597826
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,1536,fp8,0.0029567999144395193
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,1024,fp8,0.003019733230272929
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,768,fp8,0.0025759999950726825
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,512,fp8,0.0026602665583292644
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,256,fp8,0.0026229334374268847
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,128,fp8,0.002921599894762039
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,64,fp8,0.002553600072860718
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,32,fp8,0.002630399912595749
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,80,65536,fp8,0.008010666569073994
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,16384,fp8,0.0038773333032925926
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,12288,fp8,0.00412266676624616
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,10240,fp8,0.003914666672547659
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,8192,fp8,0.0033503999312718712
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,6144,fp8,0.003387733300526937
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,7168,fp8,0.0034677334129810332
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,5120,fp8,0.003352533280849457
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,4096,fp8,0.0030559999247392017
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,3584,fp8,0.0029450667401154833
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,3072,fp8,0.002977066735426585
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,2560,fp8,0.0029535998900731405
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,2048,fp8,0.0030229332546393077
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,1536,fp8,0.002624000112215678
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,1024,fp8,0.0026229334374268847
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,768,fp8,0.0025429333249727885
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,512,fp8,0.00297173336148262
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,256,fp8,0.0026101333399613695
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,128,fp8,0.0029898665845394133
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,64,fp8,0.0025685332715511323
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,32,fp8,0.0026229334374268847
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,64,65536,fp8,0.00705813318490982
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,16384,fp8,0.00420053352912267
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,12288,fp8,0.0034005333979924522
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,8192,fp8,0.0033781332274278007
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,10240,fp8,0.0033546666304270422
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,7168,fp8,0.0030037333567937215
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,6144,fp8,0.0030026666820049284
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,5120,fp8,0.0033962666988372804
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,4096,fp8,0.0030346666773160298
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,3584,fp8,0.002625066787004471
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,3072,fp8,0.0029813334345817565
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,2560,fp8,0.0026357332865397137
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,2048,fp8,0.0025173333783944448
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,1024,fp8,0.002903466671705246
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,1536,fp8,0.002749866743882497
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,768,fp8,0.0025557334224383037
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,512,fp8,0.003035733352104823
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,256,fp8,0.0026079999903837843
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,64,fp8,0.0026208000878492994
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,128,fp8,0.002703999976317088
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,32,fp8,0.0025386666258176167
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,16384,fp8,0.0038293334345022834
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,48,65536,fp8,0.006046933432420095
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,12288,fp8,0.003369600077470144
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,10240,fp8,0.0031669333577156065
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,8192,fp8,0.002942933390537898
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,6144,fp8,0.0030378667016824085
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,7168,fp8,0.0034122665723164878
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,5120,fp8,0.00266239990790685
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,4096,fp8,0.0030975999931494398
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,3584,fp8,0.002976000060637792
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,3072,fp8,0.0027104000250498454
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,2560,fp8,0.0029504001140594482
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,1536,fp8,0.0025461333493391673
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,2048,fp8,0.0030080000559488933
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,1024,fp8,0.002571733295917511
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,768,fp8,0.0026538667579491934
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,512,fp8,0.0025631998976071674
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,128,fp8,0.0025429333249727885
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,256,fp8,0.002639999985694885
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,64,fp8,0.0025568000972270967
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,32,fp8,0.0027722666660944624
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,32,65536,fp8,0.005002666513125102
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,16384,fp8,0.003092266619205475
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,12288,fp8,0.0029525332152843474
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,10240,fp8,0.003058133274316788
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,8192,fp8,0.0029578665892283124
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,7168,fp8,0.0029685333371162414
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,6144,fp8,0.002550400048494339
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,5120,fp8,0.0025685332715511323
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,4096,fp8,0.002570666621128718
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,3072,fp8,0.0027093333502610523
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,3584,fp8,0.0029866665601730345
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,2048,fp8,0.002586666742960612
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,2560,fp8,0.0026186667382717133
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,1536,fp8,0.002587733417749405
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,1024,fp8,0.002569599946339925
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,512,fp8,0.002587733417749405
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,768,fp8,0.0027189334233601887
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,256,fp8,0.002526933451493581
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,128,fp8,0.0026015999416510267
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,64,fp8,0.0025610665480295816
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,32,fp8,0.002621866762638092
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,16384,fp8,0.0025578667720158894
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,16,65536,fp8,0.003920000046491623
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,12288,fp8,0.0029674666623274487
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,10240,fp8,0.002567466596762339
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,8192,fp8,0.0026026666164398193
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,7168,fp8,0.0029813334345817565
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,5120,fp8,0.0026133333643277483
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,6144,fp8,0.003011200080315272
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,4096,fp8,0.0025600001215934755
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,3584,fp8,0.0029834667841593427
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,3072,fp8,0.002573866645495097
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,2560,fp8,0.0025621332228183747
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,2048,fp8,0.002552533398071925
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,1536,fp8,0.0027882667879263563
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,1024,fp8,0.0025642665723959604
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,768,fp8,0.002567466596762339
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,512,fp8,0.002624000112215678
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,256,fp8,0.0025802666942278544
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,128,fp8,0.0025568000972270967
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,64,fp8,0.0026464000344276427
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,32,fp8,0.002548266698916753
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,8,65536,fp8,0.003453866640726725
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,16384,fp8,0.0025621332228183747
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,10240,fp8,0.002619733413060506
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,12288,fp8,0.002959999938805898
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,7168,fp8,0.002509866654872894
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,6144,fp8,0.0028789333999156954
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,5120,fp8,0.002609066665172577
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,4096,fp8,0.002570666621128718
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,3584,fp8,0.0029450667401154833
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,3072,fp8,0.0026101333399613695
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,2560,fp8,0.002639999985694885
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,2048,fp8,0.0026154667139053345
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,8192,fp8,0.0025973332424958544
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,1536,fp8,0.002535466601451238
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,1024,fp8,0.002619733413060506
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,768,fp8,0.002598399917284648
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,512,fp8,0.00262719988822937
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,256,fp8,0.0025653332471847535
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,128,fp8,0.002549333373705546
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,64,fp8,0.0025941332181294756
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,32,fp8,0.0025205334027608235
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,16384,fp8,0.002569599946339925
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,4,65536,fp8,0.003345066557327906
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,12288,fp8,0.002598399917284648
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,10240,fp8,0.0025343999266624452
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,8192,fp8,0.002643200010061264
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,7168,fp8,0.002829866607983907
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,6144,fp8,0.0026261332134405774
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,5120,fp8,0.0025727999707063037
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,4096,fp8,0.002628266563018163
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,3584,fp8,0.002553600072860718
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,3072,fp8,0.002630399912595749
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,2560,fp8,0.00258240004380544
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,2048,fp8,0.0025962665677070618
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,1536,fp8,0.00264533335963885
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,1024,fp8,0.002639999985694885
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,768,fp8,0.002550400048494339
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,512,fp8,0.0025653332471847535
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,256,fp8,0.002566399921973546
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,128,fp8,0.0025450666745503742
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,64,fp8,0.002614400039116542
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,32,fp8,0.002648533384005229
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,2,65536,fp8,0.00264533335963885
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,16384,fp8,0.0025418666501839954
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,10240,fp8,0.0025802666942278544
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,12288,fp8,0.0029386666913827257
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,8192,fp8,0.002567466596762339
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,7168,fp8,0.002629333237806956
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,6144,fp8,0.0025450666745503742
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,5120,fp8,0.0026015999416510267
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,3584,fp8,0.002598399917284648
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,4096,fp8,0.0025248001019159954
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,3072,fp8,0.0025461333493391673
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,2560,fp8,0.002625066787004471
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,2048,fp8,0.0026101333399613695
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,1536,fp8,0.0026346666117509207
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,1024,fp8,0.002570666621128718
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,768,fp8,0.0025610665480295816
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,512,fp8,0.002584533393383026
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,256,fp8,0.0025888000925381976
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,128,fp8,0.0025546667476495106
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,64,fp8,0.00257493332028389
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,32,fp8,0.002600533266862233
TRTLLM,1.1.0,NVIDIA GB200,scale_matrix,torch_ops,1,65536,fp8,0.0029290666182835894
