framework,version,device,op_name,kernel_source,m,k,quant_dtype,latency
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,12288,fp8,0.0658592065175374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,16384,fp8,0.08684159914652506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,10240,fp8,0.054986667633056624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,8192,fp8,0.04261439641316733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,5120,fp8,0.0172917366027832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,6144,fp8,0.028865067164103198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,7168,fp8,0.03667946656545003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,4096,fp8,0.022820270061492932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,3072,fp8,0.018935465812683107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,3584,fp8,0.026336002349853525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,2048,fp8,0.013958400487899782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,1536,fp8,0.012890666723251345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,2560,fp8,0.016721065839131674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,1024,fp8,0.011831467350323994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,512,fp8,0.011133867502212524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,768,fp8,0.013566933075586955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,256,fp8,0.010229333738485974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,128,fp8,0.011073066790898641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,64,fp8,0.009543466816345849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,32,fp8,0.009560533861319226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,12288,fp8,0.02923200130462647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,16384,fp8,0.043971188863118515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,8192,fp8,0.0204746683438619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,10240,fp8,0.016248536109924314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,7168,fp8,0.021457068125406896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,6144,fp8,0.01849066615104675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,4096,fp8,0.014356265465418495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,5120,fp8,0.016427733500798545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,3072,fp8,0.013908266027768453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,3584,fp8,0.014108798901240033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,2048,fp8,0.012074666221936543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,2560,fp8,0.013516799608866373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,1024,fp8,0.011865598956743877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,1536,fp8,0.011853866775830587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,512,fp8,0.010057600339253743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,768,fp8,0.01202026555935542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,256,fp8,0.011095466961463293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,128,fp8,0.009637332955996196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,64,fp8,0.009550932794809341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,32,fp8,0.010072533041238785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,16384,fp8,0.02285546859105428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,12288,fp8,0.01852800051371256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,10240,fp8,0.015434664487838746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8192,65536,fp8,0.32220478057861324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,8192,fp8,0.014472534259160359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,7168,fp8,0.01401173273722331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,5120,fp8,0.013970133662223818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,6144,fp8,0.013526400923728945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4096,65536,fp8,0.16548690795898446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,4096,fp8,0.012066133817036946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,3584,fp8,0.013109333316485087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,3072,fp8,0.011469866832097372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,2560,fp8,0.012746666371822358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,2048,fp8,0.012684799234072366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,1536,fp8,0.0120128000775973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,512,fp8,0.010640000551939012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,768,fp8,0.010458666582902273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,1024,fp8,0.01180800000826518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,128,fp8,0.010304000228643417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,64,fp8,0.009419733782609303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,256,fp8,0.009446399907271067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,32,fp8,0.007441066453854243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,16384,fp8,0.014748797814051313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,12288,fp8,0.013190399607022604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,10240,fp8,0.013547732432683306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,8192,fp8,0.012129067381223043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,7168,fp8,0.012766933441162108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,6144,fp8,0.012052266796429952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,4096,fp8,0.009976532558600109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,3584,fp8,0.012344534198443096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,3072,fp8,0.011696000397205352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,2560,fp8,0.011763200660546622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,2048,fp8,0.01140053321917852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,1536,fp8,0.010677333672841391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,1024,fp8,0.010685867071151732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,768,fp8,0.01065066655476888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,512,fp8,0.010785067081451417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,256,fp8,0.009340799848238625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,128,fp8,0.009350400169690451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2048,65536,fp8,0.0870677312215169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,5120,fp8,0.010958932340145111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,64,fp8,0.008091733604669572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,32,fp8,0.009435733159383137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,12288,fp8,0.012827733159065248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,10240,fp8,0.012713600198427837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,16384,fp8,0.0140064001083374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,8192,fp8,0.012328532338142396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,7168,fp8,0.011732266346613566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,6144,fp8,0.010755200684070588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,5120,fp8,0.010769066214561463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,4096,fp8,0.011765334010124206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,3584,fp8,0.011700266102949778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1024,65536,fp8,0.04469332695007325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,3072,fp8,0.011623467504978181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,2560,fp8,0.011688533425331115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,2048,fp8,0.011245866119861602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,1536,fp8,0.010789333532253902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,768,fp8,0.009132800251245498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,1024,fp8,0.008793599903583527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,512,fp8,0.009204266468683877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,256,fp8,0.009979733576377235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,128,fp8,0.011032533148924509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,64,fp8,0.00811520020167033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,32,fp8,0.00787733346223831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,12288,fp8,0.013274666666984557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,16384,fp8,0.011898666620254518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,8192,fp8,0.009933867553869885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,10240,fp8,0.01174826622009277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,7168,fp8,0.012058667341868083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,6144,fp8,0.012712532778580981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,768,65536,fp8,0.029582929611206052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,5120,fp8,0.011294933160146077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,4096,fp8,0.012023467322190604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,3584,fp8,0.010315733651320139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,3072,fp8,0.010620800654093425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,2560,fp8,0.010433066884676616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,2048,fp8,0.010678400099277495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,1536,fp8,0.009201066941022872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,1024,fp8,0.009141332904497783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,768,fp8,0.00911253293355306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,512,fp8,0.008359466989835104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,256,fp8,0.009465599805116652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,128,fp8,0.00777280032634735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,64,fp8,0.008922666311264038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,32,fp8,0.008258132884899776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,12288,fp8,0.011531733969847363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,16384,fp8,0.012627201279004414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,10240,fp8,0.0114570677280426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,8192,fp8,0.011548800269762675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,512,65536,fp8,0.022481067975362146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,7168,fp8,0.011592533687750498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,5120,fp8,0.010376533369223279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,6144,fp8,0.011120000481605528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,4096,fp8,0.011021866897741952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,3584,fp8,0.01031466672817866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,3072,fp8,0.010646400352319082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,2560,fp8,0.010312533130248389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,2048,fp8,0.00919146711627642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,1536,fp8,0.009890133142471313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,1024,fp8,0.009605333209037782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,768,fp8,0.008185599744319916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,512,fp8,0.008007466544707615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,256,fp8,0.009234132866064705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,128,fp8,0.007765333602825801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,64,fp8,0.008255999783674875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,32,fp8,0.008211200187603632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,16384,fp8,0.011609600484371183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,384,65536,fp8,0.01853226621945699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,12288,fp8,0.012529067198435467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,10240,fp8,0.010615466535091399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,8192,fp8,0.011470933258533477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,7168,fp8,0.010690133273601533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,6144,fp8,0.010303999980290728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,5120,fp8,0.01034879982471466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,4096,fp8,0.011323733379443488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,3584,fp8,0.010083200285832088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,3072,fp8,0.010328533003727595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,2560,fp8,0.010498133053382238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,2048,fp8,0.009052799642086029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,1536,fp8,0.009549866616725921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,1024,fp8,0.010684799899657567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,768,fp8,0.01085973357160886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,512,fp8,0.0074709333479404455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,256,fp8,0.009689600268999737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,128,fp8,0.007881599913040797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,64,fp8,0.00842026670773824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,32,fp8,0.008321067194143932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,16384,fp8,0.011228800316651664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,12288,fp8,0.01105279972155889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,256,65536,fp8,0.015687465667724606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,10240,fp8,0.011171199878056844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,8192,fp8,0.011738666892051696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,7168,fp8,0.010656000673770904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,6144,fp8,0.010878933717807134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,5120,fp8,0.010456533233324686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,4096,fp8,0.009769599636395771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,3584,fp8,0.009367466221253078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,3072,fp8,0.009392000238100686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,2560,fp8,0.009257599463065464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,2048,fp8,0.010194133222103118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,1536,fp8,0.010771200060844421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,1024,fp8,0.008087466408809026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,768,fp8,0.008078933755556742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,512,fp8,0.008114133030176163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,256,fp8,0.00797973374525706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,128,fp8,0.00785386711359024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,64,fp8,0.007874133189519246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,32,fp8,0.007823999722798666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,16384,fp8,0.011916800836722056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,192,65536,fp8,0.01358933349450429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,12288,fp8,0.01129386673370997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,10240,fp8,0.011350399752457937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,8192,fp8,0.011678933600584668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,7168,fp8,0.011094399789969126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,5120,fp8,0.009441066781679788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,6144,fp8,0.010801066458225251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,4096,fp8,0.009242666512727737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,3584,fp8,0.009705599894126256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,3072,fp8,0.009149866551160813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,2560,fp8,0.008662400394678117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,2048,fp8,0.009284266829490661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,1536,fp8,0.00937599961956342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,1024,fp8,0.00757973367969195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,768,fp8,0.008001066247622172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,512,fp8,0.009910400460163754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,256,fp8,0.00924906680981318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,128,fp8,0.007797333349784215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,64,fp8,0.008313599973917007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,32,fp8,0.007800533125797908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,160,65536,fp8,0.013928532600402829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,16384,fp8,0.011584000289440153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,12288,fp8,0.011195733149846395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,10240,fp8,0.010193066298961639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,8192,fp8,0.0114506666858991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,7168,fp8,0.010401066641012827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,6144,fp8,0.01035626654823621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,5120,fp8,0.009250132987896603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,4096,fp8,0.009519999970992407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,3584,fp8,0.009738666315873463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,3072,fp8,0.010492799679438274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,2560,fp8,0.009559466441472372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,2048,fp8,0.010391467064619065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,1536,fp8,0.010152533402045569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,1024,fp8,0.010466133058071137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,768,fp8,0.009824000298976898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,512,fp8,0.008444799979527792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,256,fp8,0.007975466797749202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,128,fp8,0.008091733356316884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,64,fp8,0.008129066973924638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,32,fp8,0.008745600531498591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,128,65536,fp8,0.012010667721430458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,16384,fp8,0.01038933296998342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,12288,fp8,0.011764266838630041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,10240,fp8,0.011117866883675259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,8192,fp8,0.008877866715192795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,7168,fp8,0.010377600292364755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,6144,fp8,0.010878933717807135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,5120,fp8,0.009323733548323315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,4096,fp8,0.009810133030017218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,3584,fp8,0.009606399883826573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,3072,fp8,0.00938346708814303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,2560,fp8,0.009382400413354239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,2048,fp8,0.008861866345008216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,1536,fp8,0.010274133086204527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,1024,fp8,0.008189866195122401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,768,fp8,0.009402666240930557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,512,fp8,0.007796266923348109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,256,fp8,0.008474666625261307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,128,fp8,0.007701333363850911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,64,fp8,0.009252266585826875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,32,fp8,0.008201599617799123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,96,65536,fp8,0.012123732765515646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,16384,fp8,0.010343466699123383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,10240,fp8,0.008716800063848494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,12288,fp8,0.010748799890279772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,8192,fp8,0.010042666892210641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,7168,fp8,0.009151999652385712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,6144,fp8,0.00930240030090014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,5120,fp8,0.009683199723561605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,4096,fp8,0.009482666850090027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,3584,fp8,0.009684266398350396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,3072,fp8,0.00941226656238238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,2560,fp8,0.009487999478975932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,2048,fp8,0.008601599683364233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,1536,fp8,0.012089600414037706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,1024,fp8,0.008423466235399246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,768,fp8,0.008918399860461553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,512,fp8,0.008545066912968953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,256,fp8,0.007815466324488322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,128,fp8,0.009389866640170413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,64,fp8,0.008931199957927068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,32,fp8,0.007719466586907704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,16384,fp8,0.010517332951227823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,80,65536,fp8,0.011174399654070535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,12288,fp8,0.00896640047430992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,10240,fp8,0.009178666522105534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,8192,fp8,0.010651733726263046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,7168,fp8,0.009091200431187947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,6144,fp8,0.009508266548315684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,5120,fp8,0.009468800326188406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,4096,fp8,0.009282132983207703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,3584,fp8,0.008840533594290417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,3072,fp8,0.009878400216499966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,2560,fp8,0.010222932944695154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,2048,fp8,0.009627733379602432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,1536,fp8,0.009320533027251561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,1024,fp8,0.007789866377909977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,768,fp8,0.008756266782681148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,512,fp8,0.009811200201511383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,256,fp8,0.007948799431324005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,128,fp8,0.00823253368337949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,64,fp8,0.008133333673079808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,32,fp8,0.007800533374150594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,64,65536,fp8,0.011596799890200299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,16384,fp8,0.00968639999628067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,12288,fp8,0.009628799806038538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,10240,fp8,0.009155199676752091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,8192,fp8,0.010886399696270622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,7168,fp8,0.009445333729187648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,6144,fp8,0.008689067016045254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,5120,fp8,0.009718400488297146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,4096,fp8,0.00902613326907158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,3072,fp8,0.008069333185752234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,3584,fp8,0.009888000289599099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,2560,fp8,0.008820266276597976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,2048,fp8,0.00956799959143003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,1536,fp8,0.009540267040332158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,1024,fp8,0.007893333584070208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,768,fp8,0.007739733159542085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,512,fp8,0.008504533519347508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,256,fp8,0.007817599922418594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,128,fp8,0.008052266637484233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,64,fp8,0.00796906699736913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,32,fp8,0.008052266885836918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,16384,fp8,0.009810133278369904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,48,65536,fp8,0.011105066041151683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,12288,fp8,0.009730133910973866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,10240,fp8,0.009604266534248986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,8192,fp8,0.00944959968328476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,7168,fp8,0.009058133512735366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,6144,fp8,0.009438933432102203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,5120,fp8,0.007659733295440674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,4096,fp8,0.010793599734703702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,3584,fp8,0.009563732892274857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,3072,fp8,0.009273600329955419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,2560,fp8,0.008683733145395913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,2048,fp8,0.009317333002885182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,1536,fp8,0.008025600016117096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,1024,fp8,0.009602132936318716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,768,fp8,0.007845332970221837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,256,fp8,0.007766399532556534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,512,fp8,0.008212266365687053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,128,fp8,0.007785600423812866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,64,fp8,0.008081066111723583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,32,fp8,0.00812693362434705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,32,65536,fp8,0.011313066879908245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,16384,fp8,0.009620266656080883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,12288,fp8,0.009289599706729252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,10240,fp8,0.010239999989668527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,8192,fp8,0.009333333124717076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,7168,fp8,0.008458666255076727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,6144,fp8,0.009920000036557516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,5120,fp8,0.009268266956011455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,4096,fp8,0.008950399855772654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,3584,fp8,0.009495466450850167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,3072,fp8,0.008866133044163386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,2560,fp8,0.007715199639399846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,2048,fp8,0.010531199971834817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,1536,fp8,0.007837866495052973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,1024,fp8,0.007924266656239826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,768,fp8,0.007795199751853943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,512,fp8,0.00914133315285047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,256,fp8,0.007755733529726664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,128,fp8,0.008268799632787704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,32,fp8,0.008150399724642435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,64,fp8,0.007809066772460937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,16,65536,fp8,0.010661333054304122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,16384,fp8,0.007452800124883652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,10240,fp8,0.01016746684908867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,8192,fp8,0.0077855996787548065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,12288,fp8,0.008515199770530064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,7168,fp8,0.009708799918492635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,6144,fp8,0.007746133208274841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,5120,fp8,0.008022399495045344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,4096,fp8,0.009689600268999735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,3584,fp8,0.008273066580295562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,2560,fp8,0.008293333897988003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,2048,fp8,0.008538666119178136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,3072,fp8,0.007831466446320216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,1536,fp8,0.007781333227952321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,1024,fp8,0.00830613300204277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,768,fp8,0.007926400005817413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,512,fp8,0.008622933179140091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,256,fp8,0.007816532999277113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,64,fp8,0.008056533336639403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,32,fp8,0.007828266173601151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,128,fp8,0.007900800059239068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,8,65536,fp8,0.010075733562310535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,16384,fp8,0.00788053348660469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,12288,fp8,0.008669866373141606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,10240,fp8,0.008367999891440074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,8192,fp8,0.010300800204277039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,6144,fp8,0.007811199873685837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,5120,fp8,0.007815467069546382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,7168,fp8,0.00804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,4096,fp8,0.00901866654555003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,3584,fp8,0.007765333851178487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,3072,fp8,0.00806079978744189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,2560,fp8,0.009101866434017818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,2048,fp8,0.007750399907430012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,1024,fp8,0.008254933605591456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,768,fp8,0.007781333724657694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,1536,fp8,0.007968000570933022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,512,fp8,0.008111999928951263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,256,fp8,0.008203732967376708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,128,fp8,0.007857066889603933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,64,fp8,0.008157867193222045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,32,fp8,0.007709866762161255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,4,65536,fp8,0.00882133295138677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,16384,fp8,0.009964800129334131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,12288,fp8,0.007796266923348109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,10240,fp8,0.00791999970873197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,8192,fp8,0.008017066866159439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,7168,fp8,0.007714132964611054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,6144,fp8,0.007786667098601659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,5120,fp8,0.007756800204515457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,4096,fp8,0.009031466643015545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,3584,fp8,0.00785706639289856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,3072,fp8,0.00825813338160515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,2560,fp8,0.007769600053628286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,2048,fp8,0.007812266796827317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,1536,fp8,0.007753599931796392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,1024,fp8,0.007857066889603933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,768,fp8,0.007808000594377518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,512,fp8,0.007779200375080108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,256,fp8,0.007742933680613835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,128,fp8,0.008133333176374435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,64,fp8,0.007784533749024073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,32,fp8,0.008132266998291015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,2,65536,fp8,0.007378132889668147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,16384,fp8,0.007994666695594788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,12288,fp8,0.007823999971151352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,10240,fp8,0.00782719999551773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,8192,fp8,0.007787733028332392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,7168,fp8,0.007795200248559316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,6144,fp8,0.007754666606585184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,5120,fp8,0.00784853349129359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,4096,fp8,0.007769599556922913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,3584,fp8,0.007848532994588216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,3072,fp8,0.007708800087372462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,2560,fp8,0.007788799454768498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,2048,fp8,0.0078005328774452196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,1536,fp8,0.008213332792123159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,1024,fp8,0.007822933544715246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,768,fp8,0.008107733726501466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,512,fp8,0.007735466708739598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,256,fp8,0.007787733276685079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,128,fp8,0.00781226654847463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,64,fp8,0.008186666419108708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,32,fp8,0.0077621333301067345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,compute_scale,torch_ops,1,65536,fp8,0.00787946656346321
