framework,version,device,op_name,kernel_source,m,k,quant_dtype,latency
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,12288,fp8,0.06796908378601074
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,10240,fp8,0.05699733098347981
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,16384,fp8,0.08938134511311846
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,8192,fp8,0.044503466288248694
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,7168,fp8,0.038472533226013184
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,6144,fp8,0.029549876848856627
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,3584,fp8,0.02645013332366944
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,4096,fp8,0.025465595722198496
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,5120,fp8,0.018927995363871256
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,3072,fp8,0.019402668873469038
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,2560,fp8,0.0158133327960968
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,2048,fp8,0.015037866433461503
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,1536,fp8,0.013559466600418086
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,1024,fp8,0.013227733969688415
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,768,fp8,0.01346026659011841
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,512,fp8,0.011428265770276388
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,256,fp8,0.01154026687145233
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,128,fp8,0.010397866864999137
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,64,fp8,0.00883839949965477
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,32,fp8,0.009720533092816671
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,12288,fp8,0.03093439737955729
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,16384,fp8,0.0452202637990316
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,10240,fp8,0.01747626463572184
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,8192,fp8,0.02069973548253378
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,6144,fp8,0.01907520095507303
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,7168,fp8,0.027365332841873174
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,5120,fp8,0.01583146651585897
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,4096,fp8,0.015528531869252526
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,3072,fp8,0.013341865936915078
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,3584,fp8,0.01489813327789307
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,2048,fp8,0.013431467612584432
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,2560,fp8,0.013954132795333862
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,1536,fp8,0.011672533551851908
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,1024,fp8,0.01114133248726527
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,768,fp8,0.012667733430862426
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,512,fp8,0.011681067446867624
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,256,fp8,0.011794133484363556
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,128,fp8,0.010116266955931982
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,64,fp8,0.00992426723241806
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,32,fp8,0.009902933488289516
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,16384,fp8,0.021959467728932702
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,12288,fp8,0.01889706651369731
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,10240,fp8,0.01672640244166056
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,8192,fp8,0.015129599968592325
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,7168,fp8,0.016019201278686526
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,6144,fp8,0.013855999708175656
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,5120,fp8,0.013635199268658957
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,4096,fp8,0.013382400075594584
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,3584,fp8,0.013625599940617877
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,3072,fp8,0.013082666198412578
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,2560,fp8,0.012557865679264068
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,2048,fp8,0.011190399527549745
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,1536,fp8,0.013557332754135133
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,1024,fp8,0.012008533875147505
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,768,fp8,0.010840533177057904
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,512,fp8,0.011130666732788085
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4096,65536,fp8,0.167845344543457
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,256,fp8,0.009484800199667614
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,128,fp8,0.00954666684071223
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8192,65536,fp8,0.3265898386637369
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,64,fp8,0.010040533542633056
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,32,fp8,0.008739199737707773
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,16384,fp8,0.015434668461481726
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,12288,fp8,0.013196800152460733
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,10240,fp8,0.01359359920024872
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,8192,fp8,0.013561600446701048
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,6144,fp8,0.013262932499249777
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,7168,fp8,0.01442666749159495
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,5120,fp8,0.012012799084186555
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,4096,fp8,0.013198933998743695
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,3584,fp8,0.012068265676498414
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,3072,fp8,0.012309332688649496
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,2560,fp8,0.012189866602420808
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,2048,fp8,0.011549866696198783
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,1024,fp8,0.01035413295030594
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,1536,fp8,0.010503466427326202
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,768,fp8,0.010198399921258289
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,512,fp8,0.009650133301814398
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,256,fp8,0.009440000355243682
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,128,fp8,0.008833066870768864
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2048,65536,fp8,0.09060799280802406
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,64,fp8,0.008059733112653098
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,32,fp8,0.010745600362618766
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,12288,fp8,0.01289066672325134
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,16384,fp8,0.01373226543267568
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,8192,fp8,0.01354773243268331
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,10240,fp8,0.012523732582728068
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,7168,fp8,0.012055466572443644
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,6144,fp8,0.012155732512474062
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,5120,fp8,0.011461333433787028
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,4096,fp8,0.012500266730785373
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1024,65536,fp8,0.045371739069620776
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,3584,fp8,0.012213333944479626
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,3072,fp8,0.011565867563088736
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,2560,fp8,0.011416533589363098
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,1536,fp8,0.010577066987752914
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,2048,fp8,0.011906133592128752
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,1024,fp8,0.011252267162005107
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,768,fp8,0.009516800443331402
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,512,fp8,0.009894399841626484
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,256,fp8,0.010224000364542008
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,128,fp8,0.009797333429257076
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,64,fp8,0.009442133704821267
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,32,fp8,0.007915733009576799
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,16384,fp8,0.011695998907089233
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,12288,fp8,0.013324800133705138
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,10240,fp8,0.011741866668065388
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,8192,fp8,0.01122986674308777
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,768,65536,fp8,0.030334941546122235
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,7168,fp8,0.011911466717720032
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,6144,fp8,0.012270933389663698
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,5120,fp8,0.011476266880830128
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,4096,fp8,0.012606934209664663
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,3584,fp8,0.011450667182604473
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,3072,fp8,0.01242453356583913
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,2560,fp8,0.01081706682840983
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,2048,fp8,0.01121279944976171
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,1536,fp8,0.009046400338411332
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,1024,fp8,0.00972373311718305
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,768,fp8,0.00901760036746661
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,512,fp8,0.009550933291514714
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,256,fp8,0.009550933539867402
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,128,fp8,0.008499199648698172
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,64,fp8,0.009894399841626485
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,32,fp8,0.009971200426419576
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,16384,fp8,0.01353386640548706
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,12288,fp8,0.011318400998910268
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,512,65536,fp8,0.022677334149678548
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,10240,fp8,0.012575999399026237
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,8192,fp8,0.012800000111262005
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,6144,fp8,0.011085866888364155
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,7168,fp8,0.01230079929033915
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,5120,fp8,0.011336533228556316
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,4096,fp8,0.011586133142312369
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,3584,fp8,0.01082666665315628
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,3072,fp8,0.011061333864927293
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,2560,fp8,0.01084693322579066
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,2048,fp8,0.010259200632572175
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,1536,fp8,0.009566933661699296
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,1024,fp8,0.010981333007415137
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,768,fp8,0.010006399949391682
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,512,fp8,0.009790933628877004
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,256,fp8,0.010700800021489461
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,128,fp8,0.008446933080752693
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,64,fp8,0.007976532975832623
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,32,fp8,0.008825599402189254
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,16384,fp8,0.011053866644700368
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,384,65536,fp8,0.019296000401179
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,12288,fp8,0.012305065989494324
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,10240,fp8,0.011832534273465475
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,8192,fp8,0.011466666559378306
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,7168,fp8,0.012222932279109954
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,6144,fp8,0.01090239981810252
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,5120,fp8,0.010538666943709057
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,4096,fp8,0.011490133156379064
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,3584,fp8,0.010788266609112421
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,3072,fp8,0.010452266285816827
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,2560,fp8,0.008886400361855822
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,2048,fp8,0.00894079953432083
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,1536,fp8,0.009201066195964811
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,1024,fp8,0.010380800565083819
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,768,fp8,0.00943573365608851
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,512,fp8,0.009780266384283701
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,256,fp8,0.008556800335645674
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,128,fp8,0.009787733604510624
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,64,fp8,0.009496533373991648
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,32,fp8,0.009446399658918383
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,256,65536,fp8,0.014232534170150753
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,16384,fp8,0.012447999914487202
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,12288,fp8,0.011275733013947806
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,10240,fp8,0.011536000172297159
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,8192,fp8,0.010372266670068107
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,7168,fp8,0.011733333269755047
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,6144,fp8,0.010250666240851084
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,5120,fp8,0.0116266667842865
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,4096,fp8,0.010497067123651505
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,3584,fp8,0.010200533270835876
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,3072,fp8,0.008763733754555383
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,2560,fp8,0.009884800513585408
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,2048,fp8,0.009350400418043137
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,1536,fp8,0.0102634663383166
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,1024,fp8,0.010529066622257232
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,768,fp8,0.010739200065533321
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,512,fp8,0.00893759975830714
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,256,fp8,0.009293866157531737
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,128,fp8,0.007729066411654155
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,64,fp8,0.008211199939250947
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,32,fp8,0.009390933066606521
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,192,65536,fp8,0.013234134515126547
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,16384,fp8,0.011821866532166797
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,12288,fp8,0.012221866349379221
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,10240,fp8,0.010876799623171488
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,8192,fp8,0.010642133156458537
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,7168,fp8,0.010711466521024704
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,6144,fp8,0.011076266318559647
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,5120,fp8,0.010076799988746641
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,3584,fp8,0.010241066912810008
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,4096,fp8,0.011201066772143045
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,3072,fp8,0.009435733407735826
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,2560,fp8,0.010369066397349039
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,2048,fp8,0.009139200299978254
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,1536,fp8,0.010747733215490978
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,1024,fp8,0.009683199723561605
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,768,fp8,0.009297067175308864
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,512,fp8,0.01049813355008761
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,256,fp8,0.007912533481915791
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,128,fp8,0.007749333729346593
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,64,fp8,0.009377066791057586
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,32,fp8,0.00784640039006869
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,160,65536,fp8,0.013654399911562603
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,16384,fp8,0.011829332510630288
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,12288,fp8,0.010513066252072652
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,10240,fp8,0.01050133357445399
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,8192,fp8,0.010667733351389567
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,7168,fp8,0.012160000205039979
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,6144,fp8,0.009629866977532707
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,4096,fp8,0.010516266773144402
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,5120,fp8,0.009632000078757605
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,3584,fp8,0.009033599992593129
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,3072,fp8,0.011084800213575363
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,2560,fp8,0.010007466375827789
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,1536,fp8,0.011289600034554797
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,2048,fp8,0.009843199948469796
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,1024,fp8,0.00850346659620603
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,768,fp8,0.009640533725420633
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,512,fp8,0.008755199859539667
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,128,fp8,0.008343466371297837
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,256,fp8,0.00980373347798983
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,64,fp8,0.008371199915806453
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,32,fp8,0.009410133212804792
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,128,65536,fp8,0.011966932813326517
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,16384,fp8,0.010810666282971702
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,12288,fp8,0.010674133151769637
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,10240,fp8,0.0110773337384065
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,8192,fp8,0.008292266229788462
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,6144,fp8,0.00897173285484314
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,7168,fp8,0.010046933839718499
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,5120,fp8,0.009788800030946732
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,4096,fp8,0.009537066767613091
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,3584,fp8,0.009161600222190223
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,3072,fp8,0.009479467074076334
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,2560,fp8,0.009479467074076335
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,1536,fp8,0.01036586637298266
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,2048,fp8,0.00956373338898023
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,1024,fp8,0.007678933689991633
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,768,fp8,0.008860800166924795
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,512,fp8,0.008234666287899017
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,256,fp8,0.00853013296922048
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,128,fp8,0.0077183999121189124
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,32,fp8,0.0077792003750801085
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,64,fp8,0.009541333715120951
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,96,65536,fp8,0.013186132907867434
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,16384,fp8,0.011739733318487803
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,12288,fp8,0.010603733360767366
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,10240,fp8,0.011023999998966853
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,8192,fp8,0.009788800030946732
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,7168,fp8,0.010726400216420491
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,6144,fp8,0.010054399569829304
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,5120,fp8,0.009403733412424724
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,4096,fp8,0.009151999652385712
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,3072,fp8,0.009821866949399313
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,3584,fp8,0.009681066125631334
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,2560,fp8,0.009874132772286732
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,2048,fp8,0.008774399509032568
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,1536,fp8,0.009702400118112565
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,1024,fp8,0.009441066781679788
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,768,fp8,0.008989867071310678
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,512,fp8,0.009475200374921163
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,256,fp8,0.007726933310429256
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,128,fp8,0.00893653358022372
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,64,fp8,0.00864746669928233
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,32,fp8,0.00883200044433276
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,80,65536,fp8,0.012376533945401511
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,16384,fp8,0.012539732952912646
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,12288,fp8,0.008537600189447405
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,10240,fp8,0.0095701331893603
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,8192,fp8,0.011366400122642518
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,6144,fp8,0.010107733805974323
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,7168,fp8,0.009212800115346908
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,5120,fp8,0.009258667131265004
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,4096,fp8,0.010584532966216404
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,3584,fp8,0.009742933263381322
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,3072,fp8,0.009627733131249747
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,2560,fp8,0.009237333138783773
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,2048,fp8,0.00876480018099149
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,1536,fp8,0.009721600015958148
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,1024,fp8,0.009461333105961481
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,768,fp8,0.008101333677768708
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,512,fp8,0.008957867075999578
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,256,fp8,0.00869440014163653
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,128,fp8,0.00818026637037595
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,64,fp8,0.008119466652472814
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,32,fp8,0.008792533228794735
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,64,65536,fp8,0.011554134388764698
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,16384,fp8,0.010202666620413463
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,12288,fp8,0.009215999643007913
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,8192,fp8,0.00936853364109993
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,10240,fp8,0.010637866457303365
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,7168,fp8,0.009522133072217305
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,6144,fp8,0.010992000252008437
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,5120,fp8,0.009005866448084513
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,4096,fp8,0.009654400249322254
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,3584,fp8,0.009897599865992864
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,3072,fp8,0.008929066359996796
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,2560,fp8,0.010826667149861654
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,2048,fp8,0.00946239953239759
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,1024,fp8,0.0075840003788471225
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,1536,fp8,0.009938133259614308
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,768,fp8,0.008029866715272267
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,512,fp8,0.007934933652480443
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,256,fp8,0.007750400155782699
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,64,fp8,0.007720533261696497
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,128,fp8,0.008652799824873606
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,32,fp8,0.007829333841800689
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,16384,fp8,0.00992533341050148
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,48,65536,fp8,0.012742399672667186
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,12288,fp8,0.009307733674844105
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,10240,fp8,0.0102933332324028
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,8192,fp8,0.009602133681376773
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,6144,fp8,0.009418666611115139
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,7168,fp8,0.00913813312848409
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,5120,fp8,0.009447466830412548
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,4096,fp8,0.008677333345015845
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,3584,fp8,0.009321599453687667
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,3072,fp8,0.009850666920344035
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,2560,fp8,0.008407466610272725
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,1536,fp8,0.007947733253240585
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,2048,fp8,0.008341333270072937
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,1024,fp8,0.009676799674828847
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,768,fp8,0.009187199920415879
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,512,fp8,0.009510400394598642
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,128,fp8,0.008938666681448618
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,256,fp8,0.010088533163070679
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,64,fp8,0.007764266679684321
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,32,fp8,0.008177066842714943
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,32,65536,fp8,0.01153600017229716
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,16384,fp8,0.010647466282049814
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,12288,fp8,0.009431467205286025
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,10240,fp8,0.009525333096583686
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,8192,fp8,0.00781226654847463
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,7168,fp8,0.010887466371059418
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,6144,fp8,0.009921066214640936
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,5120,fp8,0.010091733684142432
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,4096,fp8,0.00881386622786522
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,3072,fp8,0.007959466924269994
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,3584,fp8,0.010528000195821126
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,2048,fp8,0.010258133212725323
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,2560,fp8,0.007885866115490596
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,1536,fp8,0.008938666433095932
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,1024,fp8,0.009417600433031718
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,512,fp8,0.008160000294446945
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,768,fp8,0.00811199943224589
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,256,fp8,0.008810666700204214
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,128,fp8,0.008252800256013871
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,64,fp8,0.007783466577529908
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,32,fp8,0.008754133681456248
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,16384,fp8,0.009844266374905904
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,16,65536,fp8,0.010614399860302607
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,12288,fp8,0.008644266178210576
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,10240,fp8,0.010622933010260266
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,8192,fp8,0.009595732887585957
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,7168,fp8,0.008794666826725006
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,5120,fp8,0.00777706652879715
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,6144,fp8,0.007767466455698013
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,4096,fp8,0.010586666564146675
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,3584,fp8,0.009105066458384195
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,3072,fp8,0.007798399527867635
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,2560,fp8,0.009235200534264248
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,2048,fp8,0.009785600254933038
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,1536,fp8,0.008042666067679723
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,1024,fp8,0.009556266417105993
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,768,fp8,0.009446399907271068
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,512,fp8,0.007807999849319459
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,256,fp8,0.00884373337030411
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,128,fp8,0.007773867001136143
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,64,fp8,0.008262400329113006
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,32,fp8,0.00795200044910113
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,8,65536,fp8,0.01041493316491445
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,16384,fp8,0.008817066997289657
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,10240,fp8,0.00776639978090922
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,12288,fp8,0.008088533580303193
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,7168,fp8,0.010140799979368845
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,6144,fp8,0.00792426640788714
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,5120,fp8,0.0077567999561627705
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,4096,fp8,0.009502933671077091
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,3584,fp8,0.0081365334490935
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,3072,fp8,0.008070400108893712
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,2560,fp8,0.008621866504351297
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,2048,fp8,0.009422933061917622
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,8192,fp8,0.008245333780845007
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,1536,fp8,0.009445333232482275
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,1024,fp8,0.009311999877293906
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,768,fp8,0.008214400211970011
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,512,fp8,0.007820799946784973
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,256,fp8,0.008918399612108868
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,128,fp8,0.007810133695602416
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,64,fp8,0.008273067077000935
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,32,fp8,0.007847467064857483
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,16384,fp8,0.009433600306510925
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,4,65536,fp8,0.011593599865833918
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,12288,fp8,0.009836799899737039
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,10240,fp8,0.008472533524036409
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,8192,fp8,0.008132266749938328
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,7168,fp8,0.007980799674987793
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,6144,fp8,0.007700267185767492
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,5120,fp8,0.00936639979481697
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,4096,fp8,0.009393066416184107
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,3584,fp8,0.009085866808891296
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,3072,fp8,0.008134399602810542
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,2560,fp8,0.00944213370482127
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,2048,fp8,0.007902933657169342
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,1536,fp8,0.007695999989906947
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,1024,fp8,0.008220799763997396
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,768,fp8,0.007812266796827317
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,512,fp8,0.007854933540026348
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,256,fp8,0.008831999947627386
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,128,fp8,0.007810133695602416
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,64,fp8,0.007748266806205114
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,32,fp8,0.008145067095756532
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,2,65536,fp8,0.00958506688475609
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,16384,fp8,0.009323733299970627
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,10240,fp8,0.008180267115434011
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,12288,fp8,0.007966932902733485
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,8192,fp8,0.00935893307129542
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,7168,fp8,0.008126933872699738
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,6144,fp8,0.0095551997423172
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,5120,fp8,0.008173866818348564
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,3584,fp8,0.008185600241025288
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,4096,fp8,0.009192533294359843
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,3072,fp8,0.007956266651550928
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,2560,fp8,0.009239466240008671
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,2048,fp8,0.007735466708739599
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,1536,fp8,0.007814400146404901
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,1024,fp8,0.00882133369644483
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,768,fp8,0.007842133442560833
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,512,fp8,0.007782400151093801
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,256,fp8,0.008236800134181975
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,128,fp8,0.007789866377909979
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,64,fp8,0.007862400263547897
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,32,fp8,0.008982399602731068
TRTLLM,1.1.0,NVIDIA GB200,compute_scale,torch_ops,1,65536,fp8,0.007572266459465027
