framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,1,2,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,1,8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,1,4,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,1,1,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,1,16,0,0.0143306665122509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,1,32,0,0.014650666465361914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,1,128,0,0.013616000612576803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,1,64,0,0.014671999961137772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,1,4,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,1,8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,1,1,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,1,2,0,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,1,16,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,1,32,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,1,64,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,1,128,0,0.017605333278576534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,16,2,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,16,1,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,16,8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,16,4,0,0.015893333901961643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,16,32,0,0.01471466695268949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,16,16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,16,128,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,16,64,0,0.01422400027513504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,16,1,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,16,2,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,16,4,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,16,8,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,16,16,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,16,32,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,16,64,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,16,128,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,32,1,0,0.015861333658297855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,32,2,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,32,8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,32,4,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,32,16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,32,32,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,32,128,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,32,64,0,0.013232000172138214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,32,1,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,32,2,0,0.017973333597183228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,32,8,0,0.01766933376590411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,32,4,0,0.019578666736682255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,32,16,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,32,32,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,32,64,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,32,128,0,0.017717332889636356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,64,2,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,64,1,0,0.01642666632930438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,64,4,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,64,8,0,0.015626666446526844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,64,16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,64,32,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,64,64,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,64,1,0,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,64,128,0,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,64,2,0,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,64,4,0,0.02162133405605952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,64,8,0,0.021712000171343487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,64,16,0,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,64,32,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,64,64,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,64,128,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,128,1,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,128,2,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,128,4,0,0.015717333803574245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,128,8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,128,16,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,128,32,0,0.016127999871969223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,128,64,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,128,128,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,128,1,0,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,128,2,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,128,4,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,128,8,0,0.02178666740655899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,128,16,0,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,128,32,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,128,64,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,128,128,0,0.020629333953062694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,256,1,0,0.022261333962281544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,256,2,0,0.02000533292690913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,256,4,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,256,8,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,256,16,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,256,64,0,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,256,32,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,256,128,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,256,1,0,0.027935999135176342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,256,2,0,0.02593066543340683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,256,4,0,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,256,8,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,256,16,0,0.023728000621000927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,256,32,0,0.02380266785621643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,256,64,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,256,128,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,512,1,0,0.03934400031963984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,512,2,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,512,4,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,512,8,0,0.021829334398110706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,512,16,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,512,32,0,0.0220266655087471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,512,128,0,0.021530665457248688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,512,64,0,0.021695998807748158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,512,1,0,0.05761066575845083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,512,2,0,0.03032533327738444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,512,4,0,0.027786667148272198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,512,8,0,0.027765333652496338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,512,16,0,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,512,32,0,0.025621332228183746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,512,64,0,0.025631998976071674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,512,128,0,0.023946667710940044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,1024,1,0,0.08158933122952779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,1024,2,0,0.05009600023428599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,1024,4,0,0.030960001051425934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,1024,8,0,0.02815466622511546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,1024,16,0,0.028016000986099243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,1024,32,0,0.02759466568628947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,1024,64,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,1024,128,0,0.028223998844623566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,1024,1,0,0.10781332850456238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,1024,2,0,0.06460799773534139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,1024,8,0,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,1024,4,0,0.0360000009338061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,1024,16,0,0.031445334355036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,1024,32,0,0.030389333764712017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,1024,64,0,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,1024,128,0,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,1536,1,0,0.13268267114957175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,1536,2,0,0.08203200002511342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,1536,4,0,0.05221866567929586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,1536,8,0,0.034474665919939675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,1536,16,0,0.034287999073664345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,1536,32,0,0.03349333256483078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,1536,64,0,0.032458665470282234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,1536,128,0,0.03215999901294708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,1536,1,0,0.16665599743525186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,1536,2,0,0.09870400031407674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,1536,4,0,0.0565226674079895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,1536,8,0,0.03875199953715006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,1536,16,0,0.03557866563399633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,1536,64,0,0.03549866626660029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,1536,32,0,0.03391999999682108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,1536,128,0,0.033610666791598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,2048,1,0,0.19573867321014404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,2048,4,0,0.07411733269691467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,2048,2,0,0.11587733030319214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,2048,8,0,0.04205333193143209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,2048,16,0,0.03994133323431015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,2048,32,0,0.03959999978542328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,2048,64,0,0.03756266583998998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,2048,128,0,0.037871999045213066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,2048,1,0,0.2290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,2048,2,0,0.13180800278981528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,2048,4,0,0.07962133487065633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,2048,8,0,0.04435733457406362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,2048,16,0,0.04188799858093262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,2048,32,0,0.03957333415746689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,2048,64,0,0.0384853333234787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,2048,128,0,0.03998400022586187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,3072,2,0,0.20020800828933716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,3072,1,0,0.35753067334493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,3072,8,0,0.0757013310988744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,3072,4,0,0.12587199608484903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,3072,16,0,0.052015999952952065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,3072,32,0,0.05053866902987162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,3072,64,0,0.050554667909940086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,3072,128,0,0.050101334849993386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,3072,2,0,0.21233065923055014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,3072,1,0,0.38181865215301514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,3072,4,0,0.12398933370908101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,3072,8,0,0.07565333445866902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,3072,16,0,0.050437331199645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,3072,32,0,0.048581331968307495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,3072,64,0,0.0479360024134318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,3072,128,0,0.048021331429481506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,4096,1,0,0.5624053478240967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,4096,4,0,0.18132799863815308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,4096,2,0,0.3084266583124797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,4096,8,0,0.11657599608103435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,4096,16,0,0.06649599969387054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,4096,32,0,0.06211733321348826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,4096,64,0,0.062319998939832054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,4096,128,0,0.06071466704209646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,4096,2,0,0.3054719964663188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,4096,1,0,0.5661813418070475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,4096,4,0,0.17494400342305502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,4096,8,0,0.10566400488217671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,4096,16,0,0.06250133117039998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,4096,32,0,0.058464000622431435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,4096,64,0,0.058245331048965454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,4096,128,0,0.05635733405749003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,6144,8,0,0.2037013371785482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,6144,2,0,0.5942720174789429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,6144,4,0,0.3306666612625122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,6144,1,0,1.0991520086924236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,6144,32,0,0.08708799878756206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,6144,16,0,0.11926399668057759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,6144,64,0,0.08471999565760295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,6144,128,0,0.08330133557319641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,6144,4,0,0.2990880012512207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,6144,2,0,0.538810650507609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,6144,1,0,1.015226682027181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,6144,8,0,0.17707200845082602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,6144,16,0,0.10666666428248088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,6144,32,0,0.07762666543324788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,6144,64,0,0.07549333572387695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,6144,128,0,0.0735040009021759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,8192,4,0,0.523904005686442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,8192,1,0,1.828938643137614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,8192,2,0,0.9584106604258219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,8192,8,0,0.3056266705195109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,8192,16,0,0.20479466517766318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,8192,32,0,0.11242666840553284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,8192,64,0,0.1093280017375946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,8192,128,0,0.10771733522415161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,8192,4,0,0.44994668165842694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,8192,1,0,1.5725760459899902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,8192,2,0,0.822154680887858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,8192,16,0,0.15829333662986755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,8192,8,0,0.25923200448354083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,8192,64,0,0.09325333436330159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,8192,32,0,0.09777067104975383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,8192,128,0,0.09126399954160054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,10240,4,0,0.7571732997894287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,10240,8,0,0.43010131518046063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,10240,2,0,1.4054773648579915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,10240,16,0,0.27249600489934284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,10240,32,0,0.15772266189257303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,10240,1,0,2.69816525777181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,10240,64,0,0.1317759950955709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,10240,128,0,0.12973333398501077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,10240,4,0,0.6234506766001383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,10240,8,0,0.35464000701904297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,10240,2,0,1.1589439709981282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,10240,16,0,0.2157706618309021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,10240,1,0,2.2492693265279136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,10240,32,0,0.12626666824022928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,10240,64,0,0.11008000373840332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,10240,128,0,0.10899200042088826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,12288,8,0,0.5721760193506876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,12288,4,0,1.0166079998016357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,12288,2,0,1.9342026710510254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,12288,16,0,0.3498773177464803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,12288,32,0,0.20994667212168375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,12288,64,0,0.18202666441599527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,12288,1,0,3.9735679626464844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,12288,128,0,0.1524853308995565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,12288,8,0,0.4598666826883952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,12288,4,0,0.8238186836242676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,12288,2,0,1.5517813364664714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,12288,16,0,0.27772800127665204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,12288,1,0,3.037013371785482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,12288,64,0,0.1309386690457662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,12288,32,0,0.17315733432769775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,12288,128,0,0.12619733810424805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,16384,8,0,0.9151146411895752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,16384,4,0,1.682949384053548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,16384,16,0,0.5252746740976969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,16384,32,0,0.3349546591440837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,16384,2,0,3.3550774256388345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,16384,64,0,0.2504533330599467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,16384,128,0,0.2046026587486267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,16384,1,0,6.855487823486328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,16384,4,0,1.3005653222401936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,16384,2,0,2.508080005645752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,16384,8,0,0.8343359629313151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,16384,16,0,0.41678933302561444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,16384,32,0,0.25992000102996826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,16384,128,0,0.1606826682885488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,16384,64,0,0.1682186722755432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,16384,1,0,5.003439903259277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,1,32768,8,0,3.1420907974243164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,1,32768,4,0,6.506997426350911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,1,32768,16,0,1.6953333218892415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,1,32768,64,0,0.7493226528167725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,1,32768,32,0,0.9675892988840739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,1,32768,128,0,0.5012266635894775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,1,32768,2,0,12.833882649739584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,1,32768,4,0,4.553365389506022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,1,32768,2,0,8.91647466023763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,1,32768,16,0,1.2367946306864421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,1,32768,8,0,2.2782932917277017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,1,32768,64,0,0.4533653259277344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,1,32768,32,0,0.723082701365153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,1,1,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,1,32768,128,0,0.31031467517217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,1,2,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,1,4,0,0.015717333803574245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,1,32768,1,0,27.065210978190105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,1,8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,1,16,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,1,32768,1,0,17.701343536376953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,1,32,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,1,64,0,0.014202666779359182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,1,128,0,0.014106666048367819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,1,2,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,1,1,0,0.021776000658671062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,1,4,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,1,8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,1,16,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,1,32,0,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,1,64,0,0.017653333644072216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,1,128,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,16,1,0,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,16,2,0,0.01580799991885821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,16,4,0,0.01584533353646596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,16,8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,16,16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,16,32,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,16,64,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,16,128,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,16,1,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,16,2,0,0.01971199984351794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,16,4,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,16,8,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,16,16,0,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,16,32,0,0.017802666872739792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,16,64,0,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,16,128,0,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,32,1,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,32,2,0,0.016106666376193363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,32,4,0,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,32,8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,32,16,0,0.015647999942302704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,32,32,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,32,64,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,32,128,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,32,1,0,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,32,2,0,0.020303999384244282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,32,4,0,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,32,8,0,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,32,16,0,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,32,64,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,32,128,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,32,32,0,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,64,1,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,64,4,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,64,2,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,64,8,0,0.01605333387851715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,64,16,0,0.016058667252461117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,64,64,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,64,32,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,64,128,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,64,1,0,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,64,4,0,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,64,2,0,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,64,8,0,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,64,16,0,0.02056533346573512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,64,32,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,64,64,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,64,128,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,128,1,0,0.022240000466505688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,128,2,0,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,128,4,0,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,128,8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,128,16,0,0.015674666812022526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,128,32,0,0.01587733378012975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,128,64,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,128,128,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,128,1,0,0.031114667654037476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,128,2,0,0.023610666394233704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,128,4,0,0.021738665799299877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,128,8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,128,16,0,0.02165333429972331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,128,128,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,128,64,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,128,32,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,256,1,0,0.03120533376932144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,256,4,0,0.019813333948453266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,256,2,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,256,8,0,0.02038399999340375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,256,16,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,256,32,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,256,64,0,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,256,128,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,256,1,0,0.05272533496220907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,256,2,0,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,256,8,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,256,4,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,256,16,0,0.024864000578721363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,256,32,0,0.023946667710940044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,256,64,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,256,128,0,0.02447466552257538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,512,1,0,0.06468800206979115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,512,2,0,0.03882666677236557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,512,4,0,0.02607999990383784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,512,8,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,512,16,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,512,32,0,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,512,64,0,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,512,128,0,0.021509334444999695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,512,1,0,0.09496000409126282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,512,2,0,0.056186666091283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,512,4,0,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,512,8,0,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,512,16,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,512,32,0,0.025077333052953083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,512,64,0,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,512,128,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,1024,1,0,0.13800000150998434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,1024,2,0,0.0809440016746521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,1024,4,0,0.04982399940490723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,1024,8,0,0.032298666735490165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,1024,16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,1024,32,0,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,1024,64,0,0.02770666778087616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,1024,128,0,0.027674667537212372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,1024,1,0,0.18762133518854776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,1024,4,0,0.0656160016854604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,1024,2,0,0.1074773371219635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,1024,8,0,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,1024,16,0,0.03140799949566523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,1024,32,0,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,1024,64,0,0.03202133377393087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,1024,128,0,0.029135999580224354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,1536,2,0,0.13209600249926248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,1536,1,0,0.23486934105555216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,1536,4,0,0.08197333415349324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,1536,8,0,0.05048533280690511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,1536,16,0,0.03606933355331421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,1536,32,0,0.03544000039498011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,1536,64,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,1536,128,0,0.03376533339420954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,1536,1,0,0.2966346740722656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,1536,2,0,0.16555200020472208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,1536,4,0,0.09689066807428996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,1536,8,0,0.05789866546789805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,1536,16,0,0.03789333254098892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,1536,32,0,0.03604800005753835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,1536,64,0,0.033743999898433685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,1536,128,0,0.03385066737731298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,2048,2,0,0.1946880022684733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,2048,1,0,0.3546559810638428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,2048,4,0,0.11636267105738322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,2048,8,0,0.07437866429487865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,2048,16,0,0.04186666508515676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,2048,32,0,0.040991999208927155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,2048,64,0,0.04005866746107737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,2048,128,0,0.03868266691764196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,2048,1,0,0.4182346661885579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,2048,2,0,0.227728009223938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,2048,4,0,0.13147200147310892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,2048,8,0,0.08104533453782399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,2048,16,0,0.046122665206591286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,2048,32,0,0.041893333196640015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,2048,64,0,0.04053333401679993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,2048,128,0,0.0402453343073527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,3072,1,0,0.6641546487808228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,3072,2,0,0.3591200113296509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,3072,8,0,0.12403733531634013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,3072,4,0,0.2014240026473999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,3072,16,0,0.07520533104737599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,3072,32,0,0.05211733281612396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,3072,64,0,0.05216533442338308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,3072,128,0,0.0499946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,3072,1,0,0.7128907044728597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,3072,2,0,0.3797599871953328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,3072,4,0,0.2116640011469523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,3072,8,0,0.12398933370908101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,3072,32,0,0.050250664353370667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,3072,16,0,0.07429333527882893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,3072,128,0,0.04799466828505198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,3072,64,0,0.04977599779764811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,4096,4,0,0.3111093242963155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,4096,2,0,0.5643306573232015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,4096,8,0,0.1828426718711853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,4096,1,0,1.0653546651204426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,4096,16,0,0.11846400300661723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,4096,32,0,0.06715199848016103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,4096,64,0,0.06483200192451477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,4096,128,0,0.06227200229962667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,4096,8,0,0.17427200078964233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,4096,1,0,1.0843946933746338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,4096,4,0,0.3077546755472819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,4096,2,0,0.5632746616999308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,4096,16,0,0.10687466462453206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,4096,32,0,0.06437866886456807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,4096,128,0,0.05635733405749003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,4096,64,0,0.05894400179386139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,6144,8,0,0.3317546645800273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,6144,4,0,0.5946880181630453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,6144,2,0,1.1137333710988362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,6144,1,0,2.1428960164388022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,6144,16,0,0.20483734210332236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,6144,32,0,0.12823466459910074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,6144,64,0,0.09385066231091817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,6144,128,0,0.08619200189908345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,6144,4,0,0.5390880107879639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,6144,8,0,0.3001599907875061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,6144,2,0,1.011679967244466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,6144,1,0,1.969487984975179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,6144,16,0,0.1771199901898702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,6144,32,0,0.11174399654070537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,6144,64,0,0.07935466865698497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,6144,128,0,0.07709333300590515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,8192,4,0,0.9635519981384277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,8192,8,0,0.5247679948806763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,8192,2,0,1.8172213236490886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,8192,16,0,0.3147626717885335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,8192,32,0,0.19900800784428915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,8192,64,0,0.14112533132235208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,8192,1,0,3.6524108250935874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,8192,128,0,0.11382399996121724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,8192,4,0,0.8212479750315348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,8192,2,0,1.5713226000467937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,8192,8,0,0.44837868213653564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,8192,16,0,0.2595413327217102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,8192,1,0,3.0765759150187173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,8192,32,0,0.1627679963906606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,8192,64,0,0.10341333349545796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,8192,128,0,0.09634666641553243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,10240,4,0,1.399328072865804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,10240,2,0,2.756469408671061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,10240,8,0,0.7747893333435059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,10240,64,0,0.1750133236249288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,10240,32,0,0.27290133635203045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,10240,16,0,0.43355198701222736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,10240,1,0,5.699477513631185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,10240,128,0,0.1550986667474111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,10240,8,0,0.6215200026830038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,10240,4,0,1.1618133385976155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,10240,2,0,2.244277318318685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,10240,16,0,0.35596799850463867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,10240,32,0,0.21642667055130005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,10240,128,0,0.11550399661064148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,10240,64,0,0.13454932967821756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,10240,1,0,4.449322700500488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,12288,8,0,1.029968023300171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,12288,4,0,1.937712033589681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,12288,32,0,0.3518933455149333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,12288,16,0,0.5741333166758219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,12288,2,0,3.7891839345296225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,12288,64,0,0.2150719960530599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,12288,128,0,0.1985973318417867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,12288,1,0,8.097530364990234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,12288,4,0,1.550319989522298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,12288,2,0,3.01197878519694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,12288,16,0,0.46328532695770264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,12288,8,0,0.8249066670735677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,12288,64,0,0.17916800578435263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,12288,128,0,0.13460800051689148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,12288,32,0,0.2774453361829122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,12288,1,0,6.088154474894206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,16384,8,0,1.6780640284220378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,16384,4,0,3.30295467376709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,16384,16,0,0.9105226993560791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,16384,32,0,0.531930685043335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,16384,64,0,0.3403466542561849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,16384,128,0,0.26852800448735553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,16384,2,0,6.839498519897461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,16384,4,0,2.4968746503194175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,16384,2,0,4.978373209635417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,16384,8,0,1.3080960114796956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,16384,1,0,13.632058461507162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,16384,32,0,0.4182240168253581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,16384,16,0,0.7130826314290365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,16384,64,0,0.26998400688171387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,16384,128,0,0.17876799901326498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,16384,1,0,10.021114349365234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,2,32768,8,0,6.0174560546875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,2,32768,16,0,3.25492795308431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,2,32768,4,0,12.969637552897135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,2,32768,32,0,1.6823840141296387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,2,32768,64,0,0.9837760130564371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,2,32768,128,0,0.659658670425415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,2,32768,2,0,27.008219401041668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,2,32768,2,0,17.698986053466797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,2,32768,4,0,8.977237065633139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,2,32768,16,0,2.3131786982218423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,2,32768,8,0,4.509141286214192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,2,32768,32,0,1.2556533018747966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,2,32768,64,0,0.7763306299845377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,1,1,0,0.02420266717672348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,2,32768,128,0,0.4671146472295125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,1,2,0,0.0205226664741834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,1,4,0,0.016437333077192307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,1,8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,1,16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,1,32,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,1,64,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,1,128,0,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,1,1,0,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,1,2,0,0.021615999440352123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,1,4,0,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,1,8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,1,16,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,1,32,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,1,64,0,0.01803733284274737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,1,128,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,2,32768,1,0,52.744781494140625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,16,1,0,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,2,32768,1,0,36.45079549153646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,16,2,0,0.019904000063737232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,16,8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,16,4,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,16,16,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,16,64,0,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,16,128,0,0.017978666971127193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,16,32,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,16,1,0,0.028336000939210255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,16,2,0,0.024447999894618988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,16,4,0,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,16,8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,16,16,0,0.02038399999340375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,16,32,0,0.019546666493018467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,16,64,0,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,16,128,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,32,1,0,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,32,2,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,32,4,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,32,8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,32,16,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,32,32,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,32,64,0,0.015872000406185787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,32,128,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,32,1,0,0.029898665845394135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,32,2,0,0.024218666056791942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,32,4,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,32,8,0,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,32,16,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,32,32,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,32,64,0,0.0204373337328434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,32,128,0,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,64,1,0,0.027829334139823914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,64,2,0,0.02021866664290428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,64,4,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,64,8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,64,16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,64,32,0,0.016735999534527462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,64,64,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,64,128,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,64,1,0,0.035743998984495796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,64,8,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,64,2,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,64,4,0,0.022122666239738464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,64,16,0,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,64,64,0,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,64,32,0,0.019653332730134327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,64,128,0,0.019632000476121902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,128,1,0,0.03408000121514002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,128,4,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,128,2,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,128,8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,128,16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,128,32,0,0.016730666160583496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,128,64,0,0.016719999412695568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,128,128,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,128,1,0,0.05603733162085215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,128,4,0,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,128,2,0,0.029839999973773956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,128,8,0,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,128,16,0,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,128,32,0,0.0216799999276797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,128,64,0,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,128,128,0,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,256,1,0,0.05867200096448263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,256,2,0,0.03136000037193298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,256,4,0,0.021615999440352123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,256,16,0,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,256,8,0,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,256,32,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,256,64,0,0.017925333231687546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,256,128,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,256,1,0,0.08913066983222961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,256,2,0,0.05272533496220907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,256,4,0,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,256,8,0,0.025909334421157837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,256,32,0,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,256,64,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,256,16,0,0.023792001108328503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,256,128,0,0.024031999210516613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,512,1,0,0.10879466931025188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,512,4,0,0.03967999915281931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,512,2,0,0.0653599997361501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,512,8,0,0.025792000194390614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,512,16,0,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,512,32,0,0.022096000611782074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,512,64,0,0.021754667162895203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,512,128,0,0.021946666141351063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,512,1,0,0.16671999295552573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,512,2,0,0.09558932979901631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,512,8,0,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,512,4,0,0.05840000013510386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,512,16,0,0.027295999228954315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,512,32,0,0.025749333202838898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,512,64,0,0.02717333287000656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,512,128,0,0.025973332424958546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,1024,1,0,0.25020267566045123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,1024,2,0,0.13844799995422363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,1024,4,0,0.08307200173536937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,1024,8,0,0.05216533442338308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,1024,16,0,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,1024,32,0,0.029743999242782593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,1024,64,0,0.0276853342851003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,1024,128,0,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,1024,1,0,0.34597333272298175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,1024,2,0,0.1892426609992981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,1024,4,0,0.10663466652234395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,1024,8,0,0.0668213317791621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,1024,16,0,0.03643733263015747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,1024,32,0,0.03336533407370249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,1024,64,0,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,1024,128,0,0.029359998802344005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,1536,1,0,0.43995734055836994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,1536,2,0,0.23498133818308511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,1536,4,0,0.13384000460306802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,1536,8,0,0.08322133123874664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,1536,16,0,0.05031999945640564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,1536,32,0,0.036090667049090065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,1536,64,0,0.035962666074434914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,1536,128,0,0.0335359995563825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,1536,1,0,0.558570663134257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,1536,2,0,0.29706666866938275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,1536,4,0,0.1651573379834493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,1536,8,0,0.0997759997844696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,1536,16,0,0.05971199770768484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,1536,32,0,0.0379573330283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,1536,64,0,0.03610666592915853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,1536,128,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,2048,2,0,0.3593759934107463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,2048,1,0,0.6743466854095459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,2048,4,0,0.1973386605580648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,2048,8,0,0.11823999881744385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,2048,16,0,0.07674133280913036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,2048,32,0,0.044069334864616394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,2048,64,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,2048,128,0,0.04053333401679993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,2048,4,0,0.2283253272374471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,2048,2,0,0.4192533493041992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,2048,1,0,0.8038187026977539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,2048,8,0,0.13050132989883423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,2048,16,0,0.08183466891447704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,2048,32,0,0.04635733366012573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,2048,64,0,0.04229333500067393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,2048,128,0,0.04026666780312856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,3072,8,0,0.20196266969045004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,3072,4,0,0.3612266778945923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,3072,2,0,0.6758560339609782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,3072,1,0,1.2928799788157146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,3072,16,0,0.12341866890589397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,3072,32,0,0.08185066779454549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,3072,64,0,0.05690666536490122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,3072,128,0,0.05668266614278158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,3072,4,0,0.38065067927042645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,3072,8,0,0.21242133776346842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,3072,2,0,0.7144373257954916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,3072,1,0,1.385503927866618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,3072,16,0,0.1260373294353485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,3072,32,0,0.08082133531570435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,3072,64,0,0.05459199845790863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,3072,128,0,0.050383999943733215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,4096,8,0,0.30961066484451294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,4096,4,0,0.5703359842300415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,4096,2,0,1.0761280059814453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,4096,16,0,0.1835520068804423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,4096,1,0,2.1081172625223794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,4096,32,0,0.12245866656303406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,4096,64,0,0.07902400195598602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,4096,128,0,0.06862933437029521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,4096,8,0,0.3062826593716939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,4096,4,0,0.5648746490478516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,4096,2,0,1.083573341369629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,4096,32,0,0.11194133758544922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,4096,16,0,0.17711466550827026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,4096,1,0,2.1262453397115073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,4096,64,0,0.06863999863465627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,4096,128,0,0.0609386662642161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,6144,8,0,0.5978613297144572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,6144,4,0,1.1200106938680012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,6144,2,0,2.143141269683838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,6144,32,0,0.20939733584721884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,6144,16,0,0.33667198816935223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,6144,64,0,0.13198933005332947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,6144,128,0,0.1141973336537679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,6144,1,0,4.473861376444499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,6144,8,0,0.5396373271942139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,6144,4,0,1.0132426420847576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,6144,2,0,1.962015946706136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,6144,16,0,0.3036106626192729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,6144,32,0,0.18147200345993042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,6144,64,0,0.11716266473134358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,6144,1,0,3.8896106084187827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,6144,128,0,0.08534933129946391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,8192,8,0,0.9640586376190186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,8192,4,0,1.8489599227905273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,8192,16,0,0.5304586489995321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,8192,32,0,0.3277386625607808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,8192,2,0,3.625738779703776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,8192,64,0,0.20560532808303833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,8192,128,0,0.15123200416564941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,8192,1,0,7.459274927775065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,8192,4,0,1.5727519989013672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,8192,2,0,3.0797974268595376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,8192,16,0,0.4518666664759318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,8192,8,0,0.8672107060750326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,8192,32,0,0.2641599973042806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,8192,64,0,0.16902933518091837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,8192,128,0,0.11359467109044392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,8192,1,0,6.121029535929362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,10240,4,0,2.7853387196858725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,10240,8,0,1.4164533615112305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,10240,2,0,5.606645584106445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,10240,32,0,0.4401173194249471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,10240,16,0,0.7640853722890218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,10240,128,0,0.18529067436854044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,10240,64,0,0.282154659430186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,10240,1,0,11.424517313639322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,10240,8,0,1.1598827044169109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,10240,4,0,2.2392959594726562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,10240,2,0,4.463850657145183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,10240,16,0,0.6250933408737183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,10240,32,0,0.3619413375854492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,10240,128,0,0.14219733079274496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,10240,64,0,0.22423466046651205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,10240,1,0,8.972639719645182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,12288,4,0,3.8923412958780923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,12288,8,0,1.9445813496907551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,12288,16,0,1.0367413361867268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,12288,32,0,0.5829546848932902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,12288,64,0,0.36452798048655194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,12288,128,0,0.224400003751119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,12288,2,0,8.017248153686523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,12288,4,0,3.0257813135782876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,12288,8,0,1.5521599451700847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,12288,2,0,6.08900260925293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,12288,1,0,16.081748962402344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,12288,32,0,0.48870400587717694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,12288,16,0,0.839680035909017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,12288,64,0,0.2882293264071147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,12288,128,0,0.1888213356335958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,12288,1,0,12.14144515991211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,4,16384,4,0,6.814309438069661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,4,16384,8,0,3.3623199462890625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,4,16384,32,0,0.9936746756235758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,4,16384,16,0,1.6975572903951008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,4,16384,64,0,0.5478399991989136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,4,16384,128,0,0.35154132048288983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,4,16384,2,0,13.827205657958984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,4,16384,4,0,5.066965421040853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,4,16384,2,0,10.063103993733725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,4,16384,8,0,2.506154696146647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,4,16384,16,0,1.3200266361236572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,4,16384,32,0,0.724010705947876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,4,16384,128,0,0.277839998404185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,4,16384,64,0,0.42749865849812824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,4,16384,1,0,28.619178771972656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,1,1,0,0.03244800120592117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,1,2,0,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,1,4,0,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,1,8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,1,16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,1,32,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,1,64,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,1,128,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,1,1,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,1,2,0,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,1,4,0,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,1,8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,4,16384,1,0,20.104048411051433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,1,16,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,1,32,0,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,1,64,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,16,1,0,0.0322773332397143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,16,2,0,0.02459733436505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,1,128,0,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,16,4,0,0.018144000321626663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,16,8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,16,32,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,16,16,0,0.015850666910409927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,16,64,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,16,128,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,16,2,0,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,16,1,0,0.037674665451049805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,16,4,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,16,8,0,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,16,16,0,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,16,32,0,0.02065066620707512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,16,64,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,16,128,0,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,32,1,0,0.0359253336985906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,32,2,0,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,32,4,0,0.019744000087181728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,32,8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,32,16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,32,32,0,0.01598400001724561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,32,64,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,32,128,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,32,1,0,0.040474665661652885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,32,2,0,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,32,8,0,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,32,4,0,0.023647998770078022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,32,32,0,0.021685334543387096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,32,16,0,0.02091199904680252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,32,128,0,0.01987733319401741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,32,64,0,0.019999999552965164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,64,1,0,0.042037333051363625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,64,2,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,64,8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,64,4,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,64,32,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,64,16,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,64,64,0,0.01565333331624667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,64,128,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,64,1,0,0.06276800235112508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,64,2,0,0.03555200000603994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,64,4,0,0.02584533393383026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,64,8,0,0.02162666618824005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,64,16,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,64,32,0,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,64,64,0,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,64,128,0,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,128,1,0,0.0562720000743866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,128,4,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,128,2,0,0.03457066665093104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,128,8,0,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,128,16,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,128,32,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,128,64,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,128,1,0,0.09072533249855042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,128,128,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,128,2,0,0.05604266623655955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,128,4,0,0.02995733420054118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,128,8,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,128,16,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,128,32,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,128,128,0,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,128,64,0,0.020938667158285778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,256,1,0,0.09708266456921895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,256,4,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,256,2,0,0.05783999959627787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,256,8,0,0.02292799949645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,256,16,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,256,32,0,0.019834666202465694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,256,64,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,256,128,0,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,256,1,0,0.1534346640110016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,256,2,0,0.08921066919962566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,256,4,0,0.058687999844551086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,256,8,0,0.028416000306606293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,256,16,0,0.025397333006064098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,256,32,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,256,64,0,0.023711999257405598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,256,128,0,0.02460266649723053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,512,1,0,0.1977013349533081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,512,2,0,0.10935466488202412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,512,4,0,0.06577600042025249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,512,8,0,0.04010133445262909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,512,16,0,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,512,32,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,512,64,0,0.02372266600529353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,512,128,0,0.023018665611743927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,512,1,0,0.3036106626192729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,512,2,0,0.16683199008305868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,512,4,0,0.0951039989789327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,512,8,0,0.05895466605822245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,512,16,0,0.03150933235883713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,512,32,0,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,512,64,0,0.02762666592995326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,512,128,0,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,1024,2,0,0.252074658870697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,1024,1,0,0.4783253272374471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,1024,4,0,0.14040533701578775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,1024,8,0,0.08323200047016144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,1024,16,0,0.05663466453552246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,1024,32,0,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,1024,64,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,1024,128,0,0.029690665503342945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,1024,2,0,0.34568532307942706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,1024,1,0,0.6603999932607015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,1024,4,0,0.1895786722501119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,1024,8,0,0.10770133137702942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,1024,16,0,0.06913599868615468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,1024,32,0,0.0363520011305809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,1024,128,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,1024,64,0,0.03483733286460241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,1536,8,0,0.1360213359196981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,1536,4,0,0.2368746598561605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,1536,2,0,0.44253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,1536,1,0,0.8490186532338461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,1536,16,0,0.08540266752243042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,1536,64,0,0.03756800045569738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,1536,32,0,0.0561653325955073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,1536,128,0,0.03748266647259394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,1536,8,0,0.16713066895802817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,1536,4,0,0.2964373429616292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,1536,2,0,0.5563786824544271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,1536,1,0,1.080357313156128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,1536,16,0,0.101583997408549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,1536,32,0,0.06374399860699971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,1536,64,0,0.04016000032424927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,1536,128,0,0.038133333126703896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,2048,4,0,0.36152533690134686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,2048,2,0,0.6758826573689779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,2048,8,0,0.20113066832224527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,2048,1,0,1.3189120292663574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,2048,64,0,0.05204799771308899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,2048,16,0,0.12052800258000691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,2048,32,0,0.08084799846013387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,2048,128,0,0.04443199932575226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,2048,4,0,0.4211626847585042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,2048,2,0,0.804645299911499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,2048,1,0,1.5704426765441895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,2048,8,0,0.2292799949645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,2048,16,0,0.13338133692741394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,2048,32,0,0.08540800213813782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,2048,64,0,0.04990933338801066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,2048,128,0,0.044309332966804504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,3072,4,0,0.6781493028004965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,3072,2,0,1.2892266909281414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,3072,8,0,0.3652160167694092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,3072,1,0,2.562021255493164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,3072,16,0,0.20690133174260458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,3072,32,0,0.12959999839464822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,3072,64,0,0.08801066875457764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,3072,128,0,0.07066133121649425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,3072,4,0,0.7153920332590739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,3072,2,0,1.3865866661071777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,3072,16,0,0.21781333287556967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,3072,8,0,0.3816479841868083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,3072,1,0,2.742506663004557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,3072,32,0,0.1304853359858195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,3072,64,0,0.08719467123349507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,3072,128,0,0.06021333237489065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,4096,8,0,0.5732479890187582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,4096,4,0,1.0814507007598877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,4096,2,0,2.089717388153076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,4096,16,0,0.31860800584157306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,4096,32,0,0.19267733891805014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,4096,64,0,0.12893333037694296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,4096,128,0,0.09267733494440715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,4096,1,0,4.33128007253011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,4096,4,0,1.0815093517303467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,4096,8,0,0.5661973158518473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,4096,2,0,2.1175413131713867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,4096,32,0,0.18227199713389078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,4096,16,0,0.3130506674448649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,4096,64,0,0.11683733264605205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,4096,128,0,0.07760533193747203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,4096,1,0,4.206255912780762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,6144,8,0,1.1245866616566975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,6144,4,0,2.1732053756713867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,6144,16,0,0.6035199960072836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,6144,32,0,0.34411199887593585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,6144,2,0,4.471685409545898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,6144,64,0,0.21735999981562296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,6144,128,0,0.14444266756375632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,6144,4,0,1.974847952524821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,6144,1,0,8.936501185099283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,6144,2,0,3.8841225306193032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,6144,8,0,1.020645300547282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,6144,16,0,0.5482240120569865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,6144,32,0,0.31003199021021527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,6144,64,0,0.18997865915298462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,6144,128,0,0.1263200044631958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,6144,1,0,7.7309010823567705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,8,8192,4,0,3.7049280802408853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,8,8192,8,0,1.8440052668253581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,8,8192,32,0,0.5557173490524292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,8,8192,16,0,0.9773973623911539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,8,8192,64,0,0.32519465684890747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,8,8192,128,0,0.22188266118367514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,8,8192,2,0,7.553338368733724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,8,8192,4,0,3.090928077697754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,8,8192,8,0,1.5797120730082195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,8,8192,1,0,14.976842244466146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,8,8192,2,0,6.165290832519531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,8,8192,16,0,0.8347253004709879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,8,8192,64,0,0.27802133560180664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,8,8192,32,0,0.4612319866816203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,8,8192,128,0,0.17839999993642172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,1,1,0,0.05223466455936432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,1,2,0,0.03294933338960012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,1,4,0,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,1,8,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,1,16,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,1,32,0,0.015658666690190632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,1,64,0,0.01621866722901662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,1,128,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,8,8192,1,0,12.329519907633463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,1,1,0,0.05194133520126343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,1,4,0,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,1,2,0,0.033626665671666466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,1,16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,1,8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,1,32,0,0.017701332767804463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,1,64,0,0.0189280000825723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,1,128,0,0.017674667139848072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,16,1,0,0.05341866612434387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,16,2,0,0.033557333052158356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,16,4,0,0.023743999501069386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,16,8,0,0.019600000232458115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,16,16,0,0.015685333559910457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,16,32,0,0.01579733317097028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,16,64,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,16,128,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,16,1,0,0.05692266424496969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,16,4,0,0.02770666778087616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,16,2,0,0.03791466603676478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,16,8,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,16,16,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,16,32,0,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,16,64,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,16,128,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,32,1,0,0.06053866446018219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,32,2,0,0.03583999971548716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,32,4,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,32,8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,32,16,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,32,32,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,32,64,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,32,128,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,32,1,0,0.07693333427111308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,32,2,0,0.04147200038035711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,32,8,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,32,4,0,0.029824001093705494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,32,16,0,0.021946666141351063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,32,32,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,32,64,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,32,128,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,64,1,0,0.07249600191911061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,64,2,0,0.04373333354791006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,64,4,0,0.028730665644009907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,64,8,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,64,16,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,64,32,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,64,64,0,0.015685333559910457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,64,128,0,0.01568000018596649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,64,1,0,0.10762666662534077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,64,2,0,0.062165334820747375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,64,4,0,0.033957332372665405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,64,16,0,0.021615999440352123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,64,8,0,0.025802666942278545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,64,32,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,64,64,0,0.02035733312368393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,64,128,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,128,1,0,0.09794132908185323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,128,2,0,0.05686399837334951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,128,4,0,0.03369600077470144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,128,8,0,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,128,16,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,128,32,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,128,64,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,128,128,0,0.01629866659641266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,128,1,0,0.16107733050982156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,128,2,0,0.09165333708127339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,128,4,0,0.05644266804059347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,128,16,0,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,128,8,0,0.030981334547201794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,128,32,0,0.02160000056028366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,128,64,0,0.022111999491850536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,128,128,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,256,2,0,0.09749333063761394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,256,1,0,0.17840532461802164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,256,4,0,0.05867200096448263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,256,8,0,0.03198933353026708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,256,16,0,0.023728000621000927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,256,32,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,256,64,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,256,128,0,0.019839999576409657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,256,2,0,0.1543839971224467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,256,1,0,0.28755199909210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,256,4,0,0.08962666988372803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,256,8,0,0.05505066613356272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,256,16,0,0.029391999046007793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,256,32,0,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,256,128,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,256,64,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,512,2,0,0.20004266500473022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,512,1,0,0.37568533420562744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,512,4,0,0.11176000038782756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,512,8,0,0.06706133484840393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,512,16,0,0.04324266811211904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,512,32,0,0.026778665681680042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,512,64,0,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,512,128,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,512,2,0,0.3059413234392802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,512,1,0,0.5814933379491171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,512,4,0,0.1674826741218567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,512,8,0,0.09578133622805278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,512,16,0,0.06141333281993866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,512,32,0,0.03357866654793421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,512,64,0,0.02945599953333537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,512,128,0,0.028016000986099243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,1024,8,0,0.1425973375638326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,1024,4,0,0.2561279932657878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,1024,2,0,0.4774346748987834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,1024,1,0,0.9228800137837728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,1024,16,0,0.08739733695983887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,1024,32,0,0.058970664938290916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,1024,64,0,0.037317333122094475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,1024,128,0,0.03368533402681351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,1024,8,0,0.1895893414815267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,1024,4,0,0.3457813262939453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,1024,2,0,0.661296010017395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,1024,1,0,1.2912267049153645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,1024,16,0,0.11129066348075867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,1024,32,0,0.07170133292675018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,1024,64,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,1024,128,0,0.03623466690381368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,1536,4,0,0.4461546738942464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,1536,2,0,0.8538453578948975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,1536,1,0,1.6805334091186523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,1536,16,0,0.14217066764831543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,1536,8,0,0.24368000030517578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,1536,32,0,0.0904960036277771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,1536,64,0,0.06451733410358429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,1536,128,0,0.04806933303674062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,1536,4,0,0.5632160107294718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,1536,2,0,1.0840213298797607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,1536,1,0,2.143733342488607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,1536,8,0,0.3022453387578328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,1536,16,0,0.170522669951121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,1536,32,0,0.10609599947929382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,1536,64,0,0.07096000015735626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,1536,128,0,0.0468746672074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,2048,4,0,0.6861920356750488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,2048,2,0,1.3241120179494221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,2048,1,0,2.638906637827555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,2048,8,0,0.36859198411305744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,2048,16,0,0.20850666364034018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,2048,32,0,0.12758400042851767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,2048,64,0,0.08898666501045227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,2048,128,0,0.06458666423956554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,2048,4,0,0.8077226479848226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,2048,2,0,1.5712000528971355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,2048,8,0,0.42455466588338214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,2048,16,0,0.23425066471099854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,2048,1,0,3.1061652501424155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,2048,32,0,0.1384053329626719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,2048,64,0,0.09150399764378865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,2048,128,0,0.06002133091290792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,3072,8,0,0.6837493578592936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,3072,4,0,1.2980053424835205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,3072,16,0,0.3721173206965129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,3072,2,0,2.5742293993631997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,3072,32,0,0.2169653375943502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,3072,64,0,0.1414240002632141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,3072,128,0,0.10335999727249146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,3072,1,0,5.238970756530762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,3072,8,0,0.7248960336049398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,3072,4,0,1.4006187121073406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,3072,2,0,2.7558132807413735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,3072,32,0,0.22377600272496542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,3072,16,0,0.39183998107910156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,3072,64,0,0.14131733775138855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,3072,128,0,0.0960586667060852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,3072,1,0,5.464058558146159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,16,4096,8,0,1.086575984954834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,16,4096,4,0,2.150170644124349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,16,4096,16,0,0.5806346734364828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,16,4096,32,0,0.34759998321533203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,16,4096,2,0,4.353045463562012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,16,4096,64,0,0.20573866367340088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,16,4096,128,0,0.14281599720319113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,16,4096,1,0,8.653386433919271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,16,4096,4,0,2.13592529296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,16,4096,2,0,4.224831899007161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,16,4096,16,0,0.5785760084788004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,16,4096,8,0,1.0934666792551677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,16,4096,64,0,0.1932906707127889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,16,4096,32,0,0.3206826647122701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,16,4096,128,0,0.12983466188112894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,1,2,0,0.04982399940490723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,1,1,0,0.08886933326721191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,1,4,0,0.033301333586374916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,16,4096,1,0,8.438992182413736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,1,16,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,1,8,0,0.02421333392461141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,1,32,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,1,128,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,1,1,0,0.08738666772842407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,1,64,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,1,2,0,0.052202666799227394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,1,4,0,0.033717334270477295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,1,16,0,0.021583999196688335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,1,8,0,0.027642667293548584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,1,32,0,0.018863999595244724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,1,64,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,1,128,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,16,2,0,0.052202666799227394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,16,1,0,0.09500799576441447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,16,4,0,0.03377600014209747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,16,8,0,0.023818666736284893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,16,32,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,16,16,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,16,64,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,16,128,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,16,1,0,0.10769066214561462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,16,2,0,0.058245331048965454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,16,4,0,0.037845333417256675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,16,8,0,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,16,16,0,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,16,32,0,0.021903999149799347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,16,64,0,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,16,128,0,0.019839999576409657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,32,1,0,0.10376532872517903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,32,2,0,0.060229331254959106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,32,8,0,0.025941332181294758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,32,4,0,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,32,16,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,32,32,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,32,64,0,0.01621333385507266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,32,128,0,0.015861333658297855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,32,1,0,0.1346666713555654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,32,2,0,0.07704000174999237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,32,4,0,0.04102933406829834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,32,8,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,32,16,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,32,32,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,32,64,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,32,128,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,64,1,0,0.12625599900881448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,64,2,0,0.07052800059318542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,64,4,0,0.043151999513308205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,64,8,0,0.028538666665554047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,64,16,0,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,64,32,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,64,64,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,64,128,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,64,1,0,0.19176000356674194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,64,2,0,0.10602133472760518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,64,4,0,0.06242666641871134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,64,8,0,0.03566933423280716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,64,16,0,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,64,32,0,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,64,64,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,64,128,0,0.019717333217461903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,128,1,0,0.17839467525482178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,128,2,0,0.09729599952697754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,128,4,0,0.056277334690093994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,128,16,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,128,8,0,0.03624533365170161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,128,32,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,128,64,0,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,128,128,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,128,1,0,0.29847999413808185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,128,4,0,0.09201066692670186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,128,2,0,0.164000004529953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,128,8,0,0.05669866502285004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,128,16,0,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,128,32,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,128,64,0,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,128,128,0,0.02258133391539256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,256,1,0,0.34193066755930585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,256,2,0,0.17901867628097534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,256,4,0,0.09921066959698994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,256,8,0,0.06097066899140676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,256,32,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,256,16,0,0.036271999279658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,256,64,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,256,128,0,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,256,1,0,0.5497386852900187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,256,2,0,0.2874613404273987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,256,8,0,0.09041600426038106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,256,4,0,0.1563040018081665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,256,16,0,0.05658666789531708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,256,64,0,0.025573333104451496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,256,128,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,256,32,0,0.029930666089057922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,512,8,0,0.11340266466140747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,512,4,0,0.2020906607309977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,512,2,0,0.38473065694173175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,512,1,0,0.7280480066935221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,512,16,0,0.0711946686108907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,512,32,0,0.048138668139775596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,512,64,0,0.030159999926884968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,512,128,0,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,512,8,0,0.16946667432785034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,512,4,0,0.3078346649805705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,512,2,0,0.5858293374379476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,512,1,0,1.1404799620310466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,512,16,0,0.09915733337402344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,512,32,0,0.06446933249632518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,512,64,0,0.038634667793909706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,512,128,0,0.03217600037654241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,1024,8,0,0.26065067450205487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,1024,4,0,0.48187732696533203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,1024,2,0,0.9293440183003744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,1024,1,0,1.8294986089070637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,1024,16,0,0.15102932850519815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,1024,32,0,0.09492266178131104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,1024,64,0,0.06713066498438518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,1024,128,0,0.04795200129350027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,1024,4,0,0.6666133403778076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,1024,8,0,0.35129066308339435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,1024,2,0,1.2953706582387288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,1024,16,0,0.19569067160288492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,1024,32,0,0.11629866560300191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,1024,1,0,2.558272043863932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,1024,64,0,0.07732800145943959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,1024,128,0,0.0525493323802948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,1536,4,0,0.8580266634623209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,1536,8,0,0.45556267102559406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,1536,2,0,1.6733226776123047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,1536,32,0,0.15203733245531717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,1536,16,0,0.25220266977945965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,1536,1,0,3.339013417561849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,1536,64,0,0.1034399966398875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,1536,128,0,0.07608533402283986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,1536,4,0,1.097765366236369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,1536,2,0,2.1577226320902505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,1536,8,0,0.5708426634470621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,1536,16,0,0.31029866139094037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,1536,32,0,0.17992534240086874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,1536,1,0,4.269813219706218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,1536,64,0,0.11581866939862569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,1536,128,0,0.08075200021266937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,32,2048,4,0,1.334895928700765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,32,2048,2,0,2.6550347010294595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,32,2048,8,0,0.6987946828206381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,32,2048,16,0,0.3930026690165202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,32,2048,64,0,0.14245866735776266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,32,2048,32,0,0.22021865844726562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,32,2048,1,0,5.314736048380534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,32,2048,128,0,0.10174399614334106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,32,2048,8,0,0.8197600046793619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,32,2048,4,0,1.5899945894877117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,32,2048,16,0,0.43724266688028973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,32,2048,2,0,3.137850761413574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,32,2048,32,0,0.2464266618092855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,32,2048,64,0,0.15056000153223673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,64,1,1,0,0.1648319959640503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,32,2048,128,0,0.10316800077756245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,64,1,2,0,0.08900266885757446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,64,1,4,0,0.0517493337392807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,32,2048,1,0,6.24946657816569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,64,1,16,0,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,64,1,8,0,0.03306666761636734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,64,1,32,0,0.021525333325068157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,64,1,128,0,0.015722667177518208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,64,1,64,0,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,64,1,1,0,0.15711999932924905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,64,1,2,0,0.0888159970442454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,64,1,4,0,0.05412800113360087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,64,1,32,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,64,1,16,0,0.029205332199732464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,64,1,8,0,0.03612266729275385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,64,1,64,0,0.021925332645575207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,64,1,128,0,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,64,16,1,0,0.17274133364359537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,64,16,2,0,0.09452266494433086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,64,16,4,0,0.053914666175842285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,64,16,8,0,0.03358400116364161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,64,16,16,0,0.025418666501839954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,64,16,32,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,64,16,64,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,64,16,128,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,64,16,1,0,0.19614400466283163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,64,16,4,0,0.05659199754397074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,64,16,2,0,0.10966400305430095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,64,16,8,0,0.03793066740036011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,64,16,16,0,0.030037333567937214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,64,16,32,0,0.023738667368888855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,64,16,64,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,64,16,128,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,64,32,1,0,0.19328532616297403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,64,32,2,0,0.10299733281135559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,64,32,4,0,0.06127466758092245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,64,32,8,0,0.03614933292071024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,64,32,16,0,0.025818665822347004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,64,32,32,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,64,32,64,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,64,32,128,0,0.015840000162522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,64,32,1,0,0.24583999315897623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,64,32,2,0,0.13395200173060098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,64,32,4,0,0.07690666615962982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,64,32,8,0,0.04130133241415024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,64,32,16,0,0.029968000948429108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,64,32,64,0,0.021642667551835377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,64,32,32,0,0.02372266600529353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,64,32,128,0,0.020106667031844456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,64,64,1,0,0.2408426602681478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,64,64,2,0,0.1272213359673818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,64,64,4,0,0.0726506660381953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,64,64,8,0,0.044026667873064675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,64,64,16,0,0.029813334345817566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,64,64,64,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,64,64,32,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,64,64,128,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,64,64,1,0,0.36293331782023114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,64,64,2,0,0.19195199012756348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,64,64,4,0,0.10761599739392598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,64,64,8,0,0.06309866905212402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,64,64,16,0,0.03576533248027166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,64,64,32,0,0.025786665578683216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,64,64,64,0,0.021568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,64,64,128,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,64,128,1,0,0.3404906590779622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,64,128,2,0,0.17897067467371622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,64,128,4,0,0.10040000081062317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,64,128,8,0,0.05888533095518748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,64,128,16,0,0.03819733361403147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,64,128,32,0,0.025279998779296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,64,128,64,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,64,128,128,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,64,128,1,0,0.5793919960657755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,64,128,4,0,0.16358932852745056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,64,128,2,0,0.30294932921727497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,64,128,8,0,0.09406933188438416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,64,128,16,0,0.05902933577696482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,64,128,64,0,0.02554133286078771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,64,128,32,0,0.032672000428040825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,64,128,128,0,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,64,256,2,0,0.3439679940541585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,64,256,1,0,0.6629546483357748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,64,256,4,0,0.18381333351135254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,64,256,8,0,0.10332799951235454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,64,256,16,0,0.06425599753856659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,64,256,32,0,0.042223999897638954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,64,256,64,0,0.026549334327379864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,64,256,128,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,64,256,1,0,1.0800586541493733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,64,256,2,0,0.5518773396809896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,64,256,4,0,0.29155733187993366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,64,256,16,0,0.09434133768081665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,64,256,32,0,0.06012799839178721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,64,256,8,0,0.1588320036729177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,64,256,64,0,0.03383466601371765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,64,256,128,0,0.029989334444204967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,64,512,4,0,0.38308799266815186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,64,512,1,0,1.4321920077006023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,64,512,2,0,0.7333386739095052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,64,512,8,0,0.20778665939966837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,64,512,16,0,0.12212266524632771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,64,512,32,0,0.07915733257929485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,64,512,64,0,0.05638933181762695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,64,512,128,0,0.04204800228277842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,64,512,4,0,0.5899306535720825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,64,512,2,0,1.1490026315053303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,64,512,1,0,2.2643465995788574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,64,512,8,0,0.311514675617218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,64,512,16,0,0.175327996412913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,64,512,32,0,0.10539733370145161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,64,512,64,0,0.07052266597747803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,64,512,128,0,0.047466665506362915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,64,1024,8,0,0.492789347966512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,64,1024,4,0,0.9340533415476481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,64,1024,2,0,1.8392106691996257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,64,1024,16,0,0.2734346588452657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,64,1024,32,0,0.1633386711279551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,64,1024,128,0,0.08288000027338664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,64,1024,64,0,0.11004799604415894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,64,1024,1,0,3.6752427419026694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,64,1024,8,0,0.6800746917724609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,64,1024,4,0,1.3138079643249512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,64,1024,16,0,0.3655093510945638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,64,1024,32,0,0.20678400993347168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,64,1024,2,0,2.5960853894551597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,64,1024,64,0,0.12829867005348206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,64,1024,128,0,0.08874666690826416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,64,1024,1,0,5.15714677174886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,128,1,1,0,0.31755733489990234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,128,1,2,0,0.1648960014184316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,128,1,4,0,0.08953600128491719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,128,1,8,0,0.052426666021347046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,128,1,16,0,0.03182400017976761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,128,1,32,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,128,1,64,0,0.02035733312368393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,128,1,128,0,0.015840000162522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,128,1,1,0,0.298527995745341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,128,1,2,0,0.15958933035532633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,128,1,4,0,0.08898666501045227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,128,1,8,0,0.054058666030565895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,128,1,16,0,0.03754133234421412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,128,1,32,0,0.02882666637500127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,128,1,64,0,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,128,1,128,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,128,16,1,0,0.33371734619140625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,128,16,2,0,0.1745013395945231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,128,16,4,0,0.09405333797136943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,128,16,8,0,0.05273066461086273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,128,16,16,0,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,128,16,32,0,0.025663999219735462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,128,16,64,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,128,16,128,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,128,16,1,0,0.37668800354003906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,128,16,2,0,0.19805866479873657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,128,16,4,0,0.10899733503659566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,128,16,8,0,0.05827199916044871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,128,16,16,0,0.03826133410135905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,128,16,32,0,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,128,16,128,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,128,16,64,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,128,32,1,0,0.3744800090789795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,128,32,2,0,0.19407467047373453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,128,32,4,0,0.10467732946077983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,128,32,8,0,0.061146666606267296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,128,32,16,0,0.0360959991812706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,128,32,32,0,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,128,32,64,0,0.019946667055288952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,128,32,128,0,0.018186666071414948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,128,32,1,0,0.47355198860168457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,128,32,2,0,0.24849067131678262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,128,32,8,0,0.07820799946784973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,128,32,4,0,0.13236799836158752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,128,32,32,0,0.03181866556406021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,128,32,16,0,0.04193066557248434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,128,32,128,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,128,32,64,0,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,128,64,2,0,0.24048000574111938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,128,64,1,0,0.46888534228007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,128,64,4,0,0.1285813351472219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,128,64,8,0,0.07499200105667114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,128,64,16,0,0.04580266773700714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,128,64,32,0,0.030415999392668407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,128,64,64,0,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,128,64,128,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,128,64,1,0,0.7058453559875488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,128,64,2,0,0.36535998185475665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,128,64,4,0,0.1965706745783488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,128,64,16,0,0.06740266581376393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,128,64,8,0,0.10915199915568034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,128,64,32,0,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,128,64,64,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,128,64,128,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,128,128,1,0,0.6623573303222656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,128,128,4,0,0.18116267522176108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,128,128,2,0,0.34233065446217853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,128,128,8,0,0.10342400272687276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,128,128,16,0,0.06381333371003468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,128,128,32,0,0.04190400242805481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,128,128,64,0,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,128,128,128,0,0.022133332987626392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,128,128,2,0,0.5833760102589926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,128,128,1,0,1.1401653289794922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,128,128,4,0,0.30502933263778687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,128,128,8,0,0.16700265804926553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,128,128,16,0,0.09594133496284485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,128,128,32,0,0.0621973325808843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,128,128,64,0,0.03817066550254822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,128,128,128,0,0.028277332584063213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,128,256,8,0,0.1871253252029419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,128,256,4,0,0.3505920171737671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,128,256,2,0,0.6621599992116293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,128,256,1,0,1.3023200035095215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,128,256,16,0,0.11147200067838033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,128,256,32,0,0.07236266632874806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,128,256,64,0,0.05063466727733612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,128,256,128,0,0.0378560001651446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,128,256,8,0,0.29737599690755206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,128,256,4,0,0.5588906606038412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,128,256,2,0,1.083722670873006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,128,256,1,0,2.142362594604492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,128,256,16,0,0.16554666558901468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,128,256,32,0,0.10083199540774028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,128,256,64,0,0.06699199974536896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,128,256,128,0,0.04613866905371348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,128,512,4,0,0.7444907029469808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,128,512,8,0,0.39345065752665204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,128,512,2,0,1.4478453000386555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,128,512,16,0,0.22304532925287882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,128,512,32,0,0.1362826625506083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,128,512,64,0,0.09338133533795674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,128,512,1,0,2.8551785151163735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,128,512,128,0,0.07275733351707458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,128,512,8,0,0.6057013273239136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,128,512,4,0,1.1706773440043132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,128,512,2,0,2.3012000719706216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,128,512,16,0,0.32737066348393756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,128,512,32,0,0.18796799580256143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,128,512,128,0,0.08387200037638347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,128,512,64,0,0.11685333649317424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,128,512,1,0,4.576346715291341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,256,1,1,0,0.6237653493881226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,256,1,2,0,0.31908265749613446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,256,1,4,0,0.16745599110921225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,256,1,16,0,0.054229333996772766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,256,1,8,0,0.09168000022570293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,256,1,32,0,0.034186666210492454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,256,1,64,0,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,256,1,128,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,256,1,1,0,0.5797173182169596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,256,1,2,0,0.29892265796661377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,256,1,8,0,0.09431466460227966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,256,1,4,0,0.16107733050982156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,256,1,16,0,0.05639466643333435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,256,1,32,0,0.03746666759252548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,256,1,64,0,0.027914665639400482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,256,1,128,0,0.02293333411216736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,256,16,2,0,0.3315733273824056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,256,16,1,0,0.6474026838938395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,256,16,8,0,0.095360000928243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,256,16,4,0,0.17426133155822754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,256,16,16,0,0.05422399938106537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,256,16,32,0,0.0353973334034284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,256,16,64,0,0.026704000929991405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,256,16,128,0,0.020629333953062694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,256,16,2,0,0.37455999851226807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,256,16,1,0,0.721829334894816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,256,16,4,0,0.1992266575495402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,256,16,8,0,0.11190399527549744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,256,16,16,0,0.06081599990526835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,256,16,32,0,0.03956799954175949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,256,16,64,0,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,256,16,128,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,256,32,1,0,0.7343839804331461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,256,32,8,0,0.10685867071151733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,256,32,2,0,0.3744906584421794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,256,32,4,0,0.19640533129374185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,256,32,16,0,0.06252266466617584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,256,32,32,0,0.03763733307520548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,256,32,64,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,256,32,128,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,256,32,2,0,0.4732373158137004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,256,32,1,0,0.9245173136393229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,256,32,4,0,0.2502293388048808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,256,32,8,0,0.1361120045185089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,256,32,16,0,0.08107199768225352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,256,32,32,0,0.04385066529115041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,256,32,64,0,0.032672000428040825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,256,32,128,0,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,256,64,2,0,0.4676640033721924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,256,64,1,0,0.9187413056691488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,256,64,4,0,0.24428266286849976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,256,64,8,0,0.1320693294207255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,256,64,16,0,0.07838400204976399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,256,64,32,0,0.050240000089009605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,256,64,64,0,0.034287999073664345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,256,64,128,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,256,64,4,0,0.36800531546274823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,256,64,2,0,0.7080586751302084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,256,64,1,0,1.3953812917073567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,256,64,8,0,0.19823465744654337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,256,64,16,0,0.11235733826955159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,256,64,32,0,0.06896533568700154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,256,64,64,0,0.04189866781234741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,256,64,128,0,0.03153600047032038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,256,128,8,0,0.18926932414372763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,256,128,2,0,0.6679840087890625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,256,128,4,0,0.3463679949442546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,256,128,1,0,1.2976853052775066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,256,128,32,0,0.07114666700363159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,256,128,16,0,0.11223999659220378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,256,128,64,0,0.05096533397833506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,256,128,128,0,0.04001066585381826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,256,128,4,0,0.5881866614023844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,256,128,8,0,0.31014400720596313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,256,128,2,0,1.1453973452250164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,256,128,16,0,0.17307200034459433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,256,128,32,0,0.10319466392199199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,256,128,1,0,2.2600000699361167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,256,128,64,0,0.06920533378918965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,256,128,128,0,0.047744000951449074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,32,256,256,4,0,0.6786773204803467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,16,256,256,8,0,0.3621600071589152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,64,256,256,2,0,1.3180906772613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,8,256,256,16,0,0.2044853369394938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,4,256,256,32,0,0.12483732899030049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,128,256,256,1,0,2.5905067125956216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,2,256,256,64,0,0.0876533289750417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,float16,1,256,256,128,0,0.06673066814740498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,32,256,256,4,0,1.1100320021311443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,16,256,256,8,0,0.575493335723877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,64,256,256,2,0,2.1739625930786133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,8,256,256,16,0,0.31252266963322956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,4,256,256,32,0,0.1779680053393046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,2,256,256,64,0,0.11331199606259663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,1,256,256,128,0,0.07888533174991608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,mla_context,default,float16,fp8,128,256,256,1,0,4.32146676381429
