framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,float16,0,4.884592056274414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,1,64,128,1,float16,fp8,0,4.919877370198567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,float16,0,25.47577667236328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,float16,0,4.945370674133301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,1,64,0,1,fp8,fp8,0,21.923433939615887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,4,64,128,1,float16,fp8,0,4.983104070027669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,4,64,128,1,fp8,fp8,0,4.563189188639323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,float16,0,25.554667154947918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,1,64,128,1,fp8,fp8,0,4.491306622823079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,float16,0,4.976917266845703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,4,64,0,1,float16,fp8,0,25.608731587727863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,8,64,128,1,float16,fp8,0,5.018346786499023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,4,64,0,1,fp8,fp8,0,22.02686309814453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,8,64,128,1,fp8,fp8,0,4.598293304443359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,float16,0,25.61773427327474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,float16,0,2.875493367513021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,8,64,0,1,fp8,fp8,0,22.05188242594401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,96,64,128,1,float16,fp8,0,2.947728157043457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,96,64,128,1,fp8,fp8,0,2.756741205851237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,8,64,0,1,float16,fp8,0,25.676793416341145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,float16,0,13.34566879272461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,float16,0,2.548288027445475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,96,64,0,1,fp8,fp8,0,11.55737559000651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,96,64,0,1,float16,fp8,0,13.432453155517578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,1,64,128,1,float16,fp8,0,2.573018709818522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,1,64,128,1,fp8,fp8,0,2.347258726755778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,float16,0,12.877557118733725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,float16,0,2.55567995707194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,1,64,0,1,fp8,fp8,0,11.099488576253256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,1,64,0,1,float16,fp8,0,12.890271504720053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,4,64,128,1,float16,fp8,0,2.5821545918782554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,4,64,128,1,fp8,fp8,0,2.3610612551371255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,float16,0,12.900357564290365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,float16,0,2.5703733762105307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,1,64,0,1,float16,fp8,0,25.449081420898438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,8,64,128,1,float16,fp8,0,2.598618666330973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,4,64,0,1,fp8,fp8,0,11.124378204345703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,4,64,0,1,float16,fp8,0,12.929818471272787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,8,64,128,1,fp8,fp8,0,2.377669334411621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,float16,0,1.5717172622680664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,float16,0,12.938847859700521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,8,64,0,1,fp8,fp8,0,11.136229197184244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,float16,0,6.852293650309245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,8,64,0,1,float16,fp8,0,12.958746592203775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,96,64,128,1,float16,fp8,0,1.6160267194112141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,96,64,128,1,fp8,fp8,0,1.5244746208190918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,float16,0,1.4212533632914226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,1,64,128,1,float16,fp8,0,1.4303040504455566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,1,64,128,1,fp8,fp8,0,1.3219412962595622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,96,64,0,1,fp8,fp8,0,5.962133407592773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,96,64,0,1,float16,fp8,0,6.878917058308919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,float16,0,1.4226773579915364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,float16,0,6.614005406697591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,1,64,0,1,fp8,fp8,0,5.7379201253255205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,1,64,0,1,float16,fp8,0,6.61622937520345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,4,64,128,1,float16,fp8,0,1.4357813199361165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,4,64,128,1,fp8,fp8,0,1.3271839618682861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,float16,0,6.616325378417969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,float16,0,1.4312480290730794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,8,64,128,1,float16,fp8,0,1.4453387260437012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,4,64,0,1,fp8,fp8,0,5.740079879760742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,8,64,128,1,fp8,fp8,0,1.337114651997884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,4,64,0,1,float16,fp8,0,6.6486562093098955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,float16,0,1.0880266825358074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,float16,0,6.636970520019531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,96,64,128,1,float16,fp8,0,1.094048023223877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,96,64,128,1,fp8,fp8,0,1.0282506942749023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,8,64,0,1,fp8,fp8,0,5.764586766560872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,float16,0,3.7610880533854165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,8,64,0,1,float16,fp8,0,6.672314961751302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,float16,0,1.0872480074564617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,1,64,128,1,float16,fp8,0,1.087178627649943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,96,64,0,1,fp8,fp8,0,3.279690742492676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,96,64,0,1,float16,fp8,0,3.7601706186930337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,1,64,128,1,fp8,fp8,0,1.021130641301473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,float16,0,3.741077423095703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,float16,0,1.0950346787770588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,4,64,128,1,float16,fp8,0,1.0879519780476887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,1,64,0,1,fp8,fp8,0,3.2719573974609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,1,64,0,1,float16,fp8,0,3.719418525695801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,4,64,128,1,fp8,fp8,0,1.0274453163146973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,float16,0,3.734623908996582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,float16,0,1.0938879648844402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,4,64,0,1,fp8,fp8,0,3.2698240280151367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,4,64,0,1,float16,fp8,0,3.7262614568074546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,8,64,128,1,float16,fp8,0,1.0952320098876953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,8,64,128,1,fp8,fp8,0,1.0264800389607747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,float16,0,3.7317174275716147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,8,64,0,1,fp8,fp8,0,3.2604106267293296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,8,64,0,1,float16,fp8,0,3.7304906845092773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,float16,0,3.641968091328939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,1,64,128,1,float16,fp8,0,3.6717761357625327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,1,64,128,1,fp8,fp8,0,3.3322668075561523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,float16,0,3.6600586573282876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,float16,0,15.114315032958984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,1,64,0,1,float16,fp8,0,15.138490041097006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,1,64,0,1,fp8,fp8,0,13.046112060546875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,4,64,128,1,float16,fp8,0,3.6919733683268228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,4,64,128,1,fp8,fp8,0,3.368650754292806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,float16,0,15.129690806070963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,float16,0,3.678645451863607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,4,64,0,1,fp8,fp8,0,13.089834849039713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,4,64,0,1,float16,fp8,0,15.190996805826822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,8,64,128,1,fp8,fp8,0,3.396922747294108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,8,64,128,1,float16,fp8,0,3.7169920603434243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,float16,0,2.1549173990885415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,float16,0,15.202613830566406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,96,64,128,1,float16,fp8,0,2.2105493545532227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,96,64,128,1,fp8,fp8,0,2.0646613438924155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,float16,0,8.020138422648111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,8,64,0,1,fp8,fp8,0,13.076107025146484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,8,64,0,1,float16,fp8,0,15.24395751953125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,float16,0,1.9126613934834797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,1,64,128,1,float16,fp8,0,1.931114673614502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,96,64,0,1,fp8,fp8,0,6.98419189453125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,96,64,0,1,float16,fp8,0,8.070522944132486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,1,64,128,1,fp8,fp8,0,1.7637440363566081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,float16,0,7.669477462768555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,float16,0,1.9215946197509766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,4,64,128,1,float16,fp8,0,1.9404266675313313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,1,64,0,1,fp8,fp8,0,6.6468855539957685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,1,64,0,1,float16,fp8,0,7.696565628051758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,4,64,128,1,fp8,fp8,0,1.7761492729187012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,float16,0,1.929258664449056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,float16,0,7.70692253112793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,8,64,128,1,float16,fp8,0,1.95032533009847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,4,64,0,1,fp8,fp8,0,6.662698745727539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,4,64,0,1,float16,fp8,0,7.7105757395426435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,8,64,128,1,fp8,fp8,0,1.7878986994425456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,float16,0,1.1831733385721843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,float16,0,7.7073014577229815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,96,64,128,1,float16,fp8,0,1.215008020401001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,96,64,128,1,fp8,fp8,0,1.1468053658803303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,float16,0,4.145669301350911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,8,64,0,1,fp8,fp8,0,6.682655970255534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,8,64,0,1,float16,fp8,0,7.746362686157227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,float16,0,1.0676906903584797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,1,64,128,1,float16,fp8,0,1.0779786904652913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,96,64,0,1,fp8,fp8,0,3.629551887512207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,96,64,0,1,float16,fp8,0,4.170485178629558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,1,64,128,1,fp8,fp8,0,0.9952106475830078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,float16,0,3.9816853205362954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,float16,0,1.0736746788024902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,4,64,128,1,float16,fp8,0,1.083626667658488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,1,64,0,1,fp8,fp8,0,3.466933250427246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,1,64,0,1,float16,fp8,0,3.987173398335775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,4,64,128,1,fp8,fp8,0,1.001039981842041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,float16,0,4.000912030537923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,float16,0,1.0783627033233643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,8,64,128,1,float16,fp8,0,1.0900320212046306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,4,64,0,1,fp8,fp8,0,3.468111991882324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,4,64,0,1,float16,fp8,0,3.9919093449910483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,8,64,128,1,fp8,fp8,0,1.0088640054066975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,float16,0,4.003936131795247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,float16,0,0.8242879708607992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,96,64,128,1,float16,fp8,0,0.8249279657999674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,8,64,0,1,fp8,fp8,0,3.4760961532592773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,96,64,128,1,fp8,fp8,0,0.7755359808603922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,8,64,0,1,float16,fp8,0,4.0122025807698565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,float16,0,2.3384052912394204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,float16,0,0.8222933610280355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,1,64,128,1,float16,fp8,0,0.8221813042958578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,96,64,0,1,fp8,fp8,0,2.0460586547851562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,96,64,0,1,float16,fp8,0,2.33078400293986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,float16,0,2.3089119593302407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,1,64,128,1,fp8,fp8,0,0.7712693214416504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,float16,0,0.8249546686808268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,1,64,0,1,fp8,fp8,0,2.039343992869059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,1,64,0,1,float16,fp8,0,2.309114615122477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,4,64,128,1,float16,fp8,0,0.8241173426310221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,4,64,128,1,fp8,fp8,0,0.7768479983011881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,float16,0,2.3041866620381675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,float16,0,0.8250453472137451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,4,64,0,1,float16,fp8,0,2.312826633453369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,4,64,0,1,fp8,fp8,0,2.0374666849772134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,8,64,128,1,float16,fp8,0,0.8244586785634359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,8,64,128,1,fp8,fp8,0,0.7770933310190836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,float16,0,2.3086986541748047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,8,64,0,1,float16,fp8,0,2.3149919509887695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,8,64,0,1,fp8,fp8,0,2.040170669555664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,float16,0,3.028479894002279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,1,64,128,1,float16,fp8,0,3.0556106567382812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,1,64,128,1,fp8,fp8,0,2.7705440521240234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,float16,0,3.0394665400187173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,float16,0,10.934944152832031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,1,64,0,1,float16,fp8,0,10.9573974609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,4,64,128,1,float16,fp8,0,3.066965421040853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,1,64,0,1,fp8,fp8,0,9.46225611368815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,4,64,128,1,fp8,fp8,0,2.7916107177734375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,float16,0,10.953882853190104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,float16,0,3.054896036783854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,4,64,0,1,fp8,fp8,0,9.485733032226562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,8,64,128,1,float16,fp8,0,3.086847941080729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,4,64,0,1,float16,fp8,0,10.98850123087565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,8,64,128,1,fp8,fp8,0,2.8159519831339517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,float16,0,1.7948533693949382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,float16,0,10.97861353556315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,96,64,128,1,float16,fp8,0,1.8412319819132488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,96,64,128,1,fp8,fp8,0,1.7245920499165852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,8,64,0,1,fp8,fp8,0,9.50937589009603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,float16,0,5.854298909505208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,8,64,0,1,float16,fp8,0,11.042767842610678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,float16,0,1.597669283548991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,96,64,0,1,fp8,fp8,0,5.1293331782023115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,96,64,0,1,float16,fp8,0,5.892266591389974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,1,64,128,1,fp8,fp8,0,1.4733440081278484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,1,64,128,1,float16,fp8,0,1.6102399826049805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,float16,0,5.567573547363281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,float16,0,1.6049973169962566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,4,64,128,1,float16,fp8,0,1.6217546463012695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,1,64,0,1,fp8,fp8,0,4.84005864461263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,1,64,0,1,float16,fp8,0,5.577269236246745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,4,64,128,1,fp8,fp8,0,1.4831892649332683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,float16,0,5.592016220092773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,float16,0,1.6141546567281086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,8,64,128,1,float16,fp8,0,1.6311786969502766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,4,64,0,1,fp8,fp8,0,4.845466613769531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,4,64,0,1,float16,fp8,0,5.60646375020345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,8,64,128,1,fp8,fp8,0,1.4956159591674805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,float16,0,5.615610758463542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,float16,0,0.9887733459472656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,96,64,128,1,float16,fp8,0,1.0170559883117676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,float16,0,3.0439840952555337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,96,64,128,1,fp8,fp8,0,0.9614453315734863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,8,64,0,1,fp8,fp8,0,4.865045229593913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,8,64,0,1,float16,fp8,0,5.614565531412761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,float16,0,0.8935946623484293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,1,64,128,1,float16,fp8,0,0.9015733400980631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,96,64,0,1,fp8,fp8,0,2.6802666982014975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,96,64,0,1,float16,fp8,0,3.0714667638142905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,1,64,128,1,fp8,fp8,0,0.8319466908772787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,float16,0,2.9002561569213867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,float16,0,0.8964107036590576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,1,64,0,1,fp8,fp8,0,2.543253262837728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,1,64,0,1,float16,fp8,0,2.9089225133260093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,4,64,128,1,float16,fp8,0,0.9062240123748779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,4,64,128,1,fp8,fp8,0,0.8362239996592203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,float16,0,2.9086294174194336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,float16,0,0.9018346468607584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,4,64,0,1,fp8,fp8,0,2.5471572875976562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,4,64,0,1,float16,fp8,0,2.917680104573568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,8,64,128,1,float16,fp8,0,0.9115893046061198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,8,64,128,1,fp8,fp8,0,0.844480037689209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,float16,0,2.9195839564005532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,float16,0,0.6900479793548584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,96,64,128,1,float16,fp8,0,0.6894773642222086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,8,64,0,1,fp8,fp8,0,2.5517759323120117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,float16,0,1.7386239369710286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,8,64,0,1,float16,fp8,0,2.9278720219930015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,96,64,128,1,fp8,fp8,0,0.6489333311716715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,float16,0,0.6880693435668945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,96,64,0,1,float16,fp8,0,1.7450025876363118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,96,64,0,1,fp8,fp8,0,1.5324640274047852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,1,64,128,1,float16,fp8,0,0.6883947054545084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,float16,0,1.7202293078104656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,1,64,128,1,fp8,fp8,0,0.6463733514149984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,float16,0,0.6909706592559814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,1,64,0,1,fp8,fp8,0,1.5187679926554363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,1,64,0,1,float16,fp8,0,1.716202735900879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,4,64,128,1,float16,fp8,0,0.6908693313598633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,4,64,128,1,fp8,fp8,0,0.6462293465932211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,float16,0,1.7263414065043132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,float16,0,0.6908906300862631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,4,64,0,1,float16,fp8,0,1.7261120478312175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,4,64,0,1,fp8,fp8,0,1.5246987342834473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,8,64,128,1,float16,fp8,0,0.6913759708404541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,8,64,128,1,fp8,fp8,0,0.6500906546910604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,float16,0,1.728218714396159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,8,64,0,1,float16,fp8,0,1.7248853047688801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,8,64,0,1,fp8,fp8,0,1.527882734934489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,float16,0,4.758607864379883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,1,64,128,1,fp8,fp8,0,4.355279922485352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,1,64,128,1,float16,fp8,0,4.793930689493815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,float16,0,4.812229474385579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,float16,0,14.731012980143229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,1,64,0,1,fp8,fp8,0,12.743855794270834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,1,64,0,1,float16,fp8,0,14.728555043538412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,4,64,128,1,float16,fp8,0,4.852874755859375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,4,64,128,1,fp8,fp8,0,4.431573232014974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,float16,0,14.8024050394694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,float16,0,4.84010664621989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,4,64,0,1,fp8,fp8,0,12.843531290690104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,8,64,128,1,float16,fp8,0,4.880192120869954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,4,64,0,1,float16,fp8,0,14.82098134358724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,8,64,128,1,fp8,fp8,0,4.463642756144206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,float16,0,2.749786694844564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,float16,0,14.86623509724935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,96,64,128,1,float16,fp8,0,2.8144585291544595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,96,64,128,1,fp8,fp8,0,2.625050703684489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,8,64,0,1,fp8,fp8,0,12.866437276204428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,float16,0,7.866698582967122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,8,64,0,1,float16,fp8,0,14.903055826822916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,float16,0,2.419098695119222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,1,64,128,1,float16,fp8,0,2.441322644551595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,96,64,0,1,fp8,fp8,0,6.90667724609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,96,64,0,1,float16,fp8,0,7.9330399831136065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,1,64,128,1,fp8,fp8,0,2.2162453333536782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,float16,0,7.415045420328776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,float16,0,2.4289600054423013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,4,64,128,1,float16,fp8,0,2.4529333114624023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,1,64,0,1,fp8,fp8,0,6.440757115681966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,1,64,0,1,float16,fp8,0,7.433616002400716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,float16,0,7.440602620442708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,4,64,128,1,fp8,fp8,0,2.2335519790649414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,float16,0,2.4422292709350586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,4,64,0,1,float16,fp8,0,7.453973134358724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,4,64,0,1,fp8,fp8,0,6.4576371510823565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,8,64,128,1,float16,fp8,0,2.4699360529581704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,8,64,128,1,fp8,fp8,0,2.2486559549967446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,float16,0,1.4370932579040527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,float16,0,7.475168228149414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,96,64,128,1,float16,fp8,0,1.4746987024943035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,96,64,128,1,fp8,fp8,0,1.382304032643636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,float16,0,4.0108747482299805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,8,64,0,1,float16,fp8,0,7.505072275797526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,8,64,0,1,fp8,fp8,0,6.476085027058919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,float16,0,1.278058687845866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,1,64,128,1,float16,fp8,0,1.2912480036417644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,96,64,0,1,fp8,fp8,0,3.5417439142862954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,96,64,0,1,float16,fp8,0,4.056085268656413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,float16,0,3.8023945490519204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,1,64,128,1,fp8,fp8,0,1.1824639638264973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,float16,0,1.2862719694773357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,4,64,128,1,float16,fp8,0,1.299295981725057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,1,64,0,1,fp8,fp8,0,3.3150558471679688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,1,64,0,1,float16,fp8,0,3.8029492696126304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,float16,0,3.8137439092000327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,4,64,128,1,fp8,fp8,0,1.1899147033691406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,float16,0,1.2924853165944417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,4,64,0,1,fp8,fp8,0,3.3163839975992837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,4,64,0,1,float16,fp8,0,3.827103932698568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,8,64,128,1,float16,fp8,0,1.3056320349375408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,8,64,128,1,fp8,fp8,0,1.198794682820638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,float16,0,3.8316214879353843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,float16,0,0.795087973276774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,96,64,128,1,float16,fp8,0,0.8176053365071615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,8,64,0,1,fp8,fp8,0,3.326650619506836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,float16,0,2.108112017313639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,96,64,128,1,fp8,fp8,0,0.7729653517405192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,8,64,0,1,float16,fp8,0,3.8417654037475586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,float16,0,0.7195200125376383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,96,64,0,1,fp8,fp8,0,1.8724160194396973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,1,64,128,1,float16,fp8,0,0.7239946524302164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,96,64,0,1,float16,fp8,0,2.128506660461426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,float16,0,1.993578592936198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,1,64,128,1,fp8,fp8,0,0.6700373490651449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,float16,0,0.7224906285603842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,1,64,0,1,fp8,fp8,0,1.751754601796468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,1,64,0,1,float16,fp8,0,2.000511964162191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,4,64,128,1,float16,fp8,0,0.7302239735921224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,float16,0,2.0037867228190103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,4,64,128,1,fp8,fp8,0,0.6749173005421957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,float16,0,0.7244160175323486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,4,64,0,1,fp8,fp8,0,1.7593493461608887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,4,64,0,1,float16,fp8,0,2.008629322052002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,8,64,128,1,float16,fp8,0,0.73199462890625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,8,64,128,1,fp8,fp8,0,0.6789920330047607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,float16,0,2.0101653734842935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,float16,0,0.5557226737340292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,float16,0,1.2279626528422039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,8,64,0,1,float16,fp8,0,2.0165759722391763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,96,64,128,1,float16,fp8,0,0.556384007136027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,8,64,0,1,fp8,fp8,0,1.7633919715881348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,96,64,128,1,fp8,fp8,0,0.5210400025049845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,float16,0,0.5577919880549113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,96,64,0,1,fp8,fp8,0,1.091312011082967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,96,64,0,1,float16,fp8,0,1.2290826638539631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,1,64,128,1,float16,fp8,0,0.5584533214569092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,1,64,128,1,fp8,fp8,0,0.5233813524246216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,float16,0,1.2119946479797363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,1,64,0,1,float16,fp8,0,1.212608019510905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,float16,0,0.5562080144882202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,1,64,0,1,fp8,fp8,0,1.0834346612294514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,float16,0,1.2138826847076416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,4,64,128,1,float16,fp8,0,0.5556159814198812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,4,64,128,1,fp8,fp8,0,0.5233440001805624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,float16,0,0.557861328125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,4,64,0,1,float16,fp8,0,1.2167306741078694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,4,64,0,1,fp8,fp8,0,1.0794026851654053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,float16,0,1.219381332397461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,8,64,128,1,float16,fp8,0,0.5575679937998453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,8,64,128,1,fp8,fp8,0,0.5239786704381307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,8,64,0,1,float16,fp8,0,1.216058651606242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,8,64,0,1,fp8,fp8,0,1.0829439957936604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,float16,0,3.541130701700846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,1,64,128,1,fp8,fp8,0,3.2302773793538413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,1,64,128,1,float16,fp8,0,3.5681705474853516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,float16,0,3.5559094746907554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,float16,0,9.023898442586264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,1,64,0,1,fp8,fp8,0,7.836655934651692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,1,64,0,1,float16,fp8,0,9.023226420084635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,4,64,128,1,float16,fp8,0,3.588458697001139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,4,64,128,1,fp8,fp8,0,3.2713279724121094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,float16,0,9.04299227396647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,float16,0,3.5801334381103516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,4,64,0,1,fp8,fp8,0,7.87506103515625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,4,64,0,1,float16,fp8,0,9.063599904378256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,8,64,128,1,float16,fp8,0,3.614586512247721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,8,64,128,1,fp8,fp8,0,3.2984641393025718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,float16,0,9.084202448527018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,float16,0,2.059312025705973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,8,64,0,1,float16,fp8,0,9.102949142456055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,8,64,0,1,fp8,fp8,0,7.913877487182617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,float16,0,4.906336148579915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,96,64,128,1,float16,fp8,0,2.1080800692240396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,96,64,128,1,fp8,fp8,0,1.9667359987894695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,float16,0,1.816677411397298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,96,64,0,1,float16,fp8,0,4.952197392781575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,96,64,0,1,fp8,fp8,0,4.3342240651448565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,1,64,128,1,float16,fp8,0,1.8334293365478516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,1,64,128,1,fp8,fp8,0,1.6669440269470215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,float16,0,4.556266784667969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,float16,0,1.8249227205912273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,1,64,0,1,fp8,fp8,0,3.9832213719685874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,1,64,0,1,float16,fp8,0,4.572965304056804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,4,64,128,1,float16,fp8,0,1.8435200055440266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,float16,0,4.577055931091309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,4,64,128,1,fp8,fp8,0,1.6797332763671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,float16,0,1.8343946139017742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,4,64,0,1,fp8,fp8,0,3.9949280420939126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,4,64,0,1,float16,fp8,0,4.594810803731282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,8,64,128,1,float16,fp8,0,1.8529119491577148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,float16,0,4.60316785176595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,8,64,128,1,fp8,fp8,0,1.6913280487060547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,float16,0,1.0819573402404785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,96,64,128,1,float16,fp8,0,1.1105066935221355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,float16,0,2.5201919873555503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,8,64,0,1,float16,fp8,0,4.617973327636719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,8,64,0,1,fp8,fp8,0,4.006319999694824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,96,64,128,1,fp8,fp8,0,1.0410666465759277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,float16,0,0.9646613597869873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,96,64,0,1,float16,fp8,0,2.5467519760131836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,96,64,0,1,fp8,fp8,0,2.2397173245747886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,1,64,128,1,float16,fp8,0,0.9736853440602621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,float16,0,2.354095935821533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,1,64,128,1,fp8,fp8,0,0.8921706676483154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,float16,0,0.9695359865824381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,1,64,0,1,float16,fp8,0,2.3607306480407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,1,64,0,1,fp8,fp8,0,2.061786651611328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,4,64,128,1,float16,fp8,0,0.9791839917500814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,float16,0,2.366757392883301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,4,64,128,1,fp8,fp8,0,0.8967359860738119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,float16,0,0.9742186864217123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,4,64,0,1,float16,fp8,0,2.3685173988342285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,4,64,0,1,fp8,fp8,0,2.072000026702881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,8,64,128,1,float16,fp8,0,0.9852533340454102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,float16,0,2.370512008666992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,8,64,128,1,fp8,fp8,0,0.9037706851959229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,float16,0,0.5998986562093099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,96,64,128,1,float16,fp8,0,0.6170933246612549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,float16,0,1.3328426678975422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,8,64,0,1,float16,fp8,0,2.3846774101257324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,8,64,0,1,fp8,fp8,0,2.077605406443278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,96,64,128,1,fp8,fp8,0,0.5847359895706177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,float16,0,0.5437866846720377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,96,64,0,1,fp8,fp8,0,1.1976479689280193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,96,64,0,1,float16,fp8,0,1.3488586743672688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,1,64,128,1,float16,fp8,0,0.5497653484344482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,float16,0,1.2536319891611736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,1,64,128,1,fp8,fp8,0,0.5086133480072021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,float16,0,0.5458293358484904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,1,64,0,1,float16,fp8,0,1.2565759817759197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,1,64,0,1,fp8,fp8,0,1.1076587041219075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,4,64,128,1,float16,fp8,0,0.5516320069630941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,float16,0,1.2585759957631428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,4,64,128,1,fp8,fp8,0,0.5125493208567301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,float16,0,0.5500693321228027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,4,64,0,1,float16,fp8,0,1.2624800205230713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,4,64,0,1,fp8,fp8,0,1.1114506721496582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,float16,0,1.2629706859588623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,8,64,128,1,fp8,fp8,0,0.5132586558659872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,8,64,128,1,float16,fp8,0,0.5552853345870972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,float16,0,0.42264533042907715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,float16,0,0.803663969039917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,8,64,0,1,float16,fp8,0,1.2699253559112549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,96,64,128,1,float16,fp8,0,0.4228479862213135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,8,64,0,1,fp8,fp8,0,1.1141013304392497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,96,64,128,1,fp8,fp8,0,0.39790932337443036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,float16,0,0.4198880195617676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,96,64,0,1,fp8,fp8,0,0.7183252970377604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,96,64,0,1,float16,fp8,0,0.8043573697408041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,1,64,128,1,float16,fp8,0,0.4197760025660197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,float16,0,0.7906453609466553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,1,64,128,1,fp8,fp8,0,0.398037314414978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,1,64,0,1,float16,fp8,0,0.7915840148925781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,float16,0,0.42291732629140216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,1,64,0,1,fp8,fp8,0,0.7081440289815267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,float16,0,0.7922826608022054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,4,64,128,1,float16,fp8,0,0.4224746624628703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,4,64,128,1,fp8,fp8,0,0.3978559970855713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,float16,0,0.4233706792195638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,4,64,0,1,float16,fp8,0,0.7893973191579183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,4,64,0,1,fp8,fp8,0,0.7114933331807455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,float16,0,0.7933759689331055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,8,64,128,1,fp8,fp8,0,0.3949013153711955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,8,64,128,1,float16,fp8,0,0.4229439894358317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,8,64,0,1,float16,fp8,0,0.7932586669921875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,8,64,0,1,fp8,fp8,0,0.7097493012746176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,float16,0,4.677280108133952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,1,64,128,1,fp8,fp8,0,4.282042821248372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,1,64,128,1,float16,fp8,0,4.716517448425293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,float16,0,9.246421178181967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,1,64,0,1,fp8,fp8,0,8.124234517415365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,1,64,0,1,float16,fp8,0,9.280725479125977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,float16,0,4.739930788675944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,4,64,128,1,float16,fp8,0,4.773861249287923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,4,64,128,1,fp8,fp8,0,4.358991940816243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,float16,0,9.334266662597656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,4,64,0,1,float16,fp8,0,9.363311767578125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,float16,0,4.764512062072754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,4,64,0,1,fp8,fp8,0,8.208378473917643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,8,64,128,1,fp8,fp8,0,4.389520009358724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,8,64,128,1,float16,fp8,0,4.808138529459636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,float16,0,9.382944107055664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,float16,0,2.6807308197021484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,96,64,128,1,float16,fp8,0,2.7378934224446616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,8,64,0,1,float16,fp8,0,9.433834711710611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,8,64,0,1,fp8,fp8,0,8.237317403157553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,float16,0,5.141599973042806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,96,64,128,1,fp8,fp8,0,2.554906686147054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,float16,0,2.3499840100606284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,96,64,0,1,fp8,fp8,0,4.556794802347819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,96,64,0,1,float16,fp8,0,5.206240018208821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,1,64,128,1,float16,fp8,0,2.3708640734354653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,float16,0,4.642767906188965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,1,64,128,1,fp8,fp8,0,2.1446399688720703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,float16,0,2.362117290496826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,1,64,0,1,fp8,fp8,0,4.077653249104817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,1,64,0,1,float16,fp8,0,4.669482549031575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,4,64,128,1,float16,fp8,0,2.3838292757670083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,4,64,128,1,fp8,fp8,0,2.1629014015197754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,float16,0,4.6616106033325195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,float16,0,2.375413258870443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,4,64,0,1,float16,fp8,0,4.692511876424153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,4,64,0,1,fp8,fp8,0,4.096490542093913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,float16,0,4.690224011739095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,8,64,128,1,float16,fp8,0,2.4005866050720215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,8,64,128,1,fp8,fp8,0,2.1803414026896157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,float16,0,1.375109354654948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,8,64,0,1,float16,fp8,0,4.711503982543945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,float16,0,2.616607983907064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,96,64,128,1,float16,fp8,0,1.4092159271240234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,8,64,0,1,fp8,fp8,0,4.1124267578125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,96,64,128,1,fp8,fp8,0,1.3153973420461018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,float16,0,1.214191993077596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,96,64,0,1,fp8,fp8,0,2.328282674153646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,96,64,0,1,float16,fp8,0,2.6508960723876953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,1,64,128,1,float16,fp8,0,1.2262293497721355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,float16,0,2.3707253138224282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,1,64,128,1,fp8,fp8,0,1.1158933639526367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,1,64,0,1,float16,fp8,0,2.3842933972676597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,float16,0,1.221951961517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,1,64,0,1,fp8,fp8,0,2.087360064188639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,float16,0,2.3871787389119468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,4,64,128,1,fp8,fp8,0,1.1252106825510662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,4,64,128,1,float16,fp8,0,1.2326719760894775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,float16,0,1.2288586298624675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,4,64,0,1,float16,fp8,0,2.3934826850891113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,4,64,0,1,fp8,fp8,0,2.100656032562256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,float16,0,2.401466687520345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,8,64,128,1,float16,fp8,0,1.2413012981414795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,8,64,128,1,fp8,fp8,0,1.1327786445617676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,float16,0,0.7268053690592448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,96,64,128,1,float16,fp8,0,0.7464213371276855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,8,64,0,1,float16,fp8,0,2.411135991414388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,float16,0,1.3564000129699707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,8,64,0,1,fp8,fp8,0,2.109370708465576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,96,64,128,1,fp8,fp8,0,0.700821320215861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,float16,0,0.6489439805348715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,96,64,0,1,fp8,fp8,0,1.2148906389872234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,96,64,0,1,float16,fp8,0,1.3778293927510579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,1,64,128,1,float16,fp8,0,0.6557759841283163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,float16,0,1.2381333510080974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,1,64,128,1,fp8,fp8,0,0.6017760038375854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,1,64,0,1,float16,fp8,0,1.244815985361735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,float16,0,0.6525439818700155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,1,64,0,1,fp8,fp8,0,1.0967573324839275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,float16,0,1.2411733468373616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,4,64,128,1,float16,fp8,0,0.6584853331247965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,4,64,128,1,fp8,fp8,0,0.6053653160730997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,4,64,0,1,float16,fp8,0,1.2503573099772136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,float16,0,0.6548480192820231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,4,64,0,1,fp8,fp8,0,1.1004160245259602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,float16,0,1.2492160002390544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,8,64,128,1,float16,fp8,0,0.6623946825663248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,8,64,128,1,fp8,fp8,0,0.6094133456548055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,float16,0,0.4081973234812419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,float16,0,0.7320853074391683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,8,64,0,1,float16,fp8,0,1.2545866966247559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,96,64,128,1,float16,fp8,0,0.41969601313273114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,8,64,0,1,fp8,fp8,0,1.1072213649749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,96,64,128,1,fp8,fp8,0,0.39657068252563477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,float16,0,0.3677866856257121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,96,64,0,1,fp8,fp8,0,0.662341316541036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,96,64,0,1,float16,fp8,0,0.7420586744944254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,1,64,128,1,float16,fp8,0,0.37174399693806964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,float16,0,0.6706240177154541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,1,64,128,1,fp8,fp8,0,0.3450719912846883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,1,64,0,1,float16,fp8,0,0.6732693513234457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,float16,0,0.369759996732076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,1,64,0,1,fp8,fp8,0,0.6011466582616171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,float16,0,0.6730666955312093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,4,64,128,1,float16,fp8,0,0.37379201253255206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,4,64,128,1,fp8,fp8,0,0.34702932834625244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,float16,0,0.37166933218638104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,4,64,0,1,fp8,fp8,0,0.6042879819869995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,4,64,0,1,float16,fp8,0,0.677178700764974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,8,64,128,1,fp8,fp8,0,0.3512266476949056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,8,64,128,1,float16,fp8,0,0.37531201044718426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,float16,0,0.6771252950032552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,float16,0,0.2884160081545512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,8,64,0,1,fp8,fp8,0,0.6067999998728434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,8,64,0,1,float16,fp8,0,0.6806293328603109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,96,64,128,1,float16,fp8,0,0.29024000962575275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,float16,0,0.4606506824493408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,96,64,128,1,fp8,fp8,0,0.2706826726595561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,float16,0,0.28568534056345624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,96,64,0,1,fp8,fp8,0,0.4137493371963501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,96,64,0,1,float16,fp8,0,0.46053866545359295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,1,64,128,1,fp8,fp8,0,0.2711413304011027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,1,64,128,1,float16,fp8,0,0.2876799901326497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,float16,0,0.4457333485285441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,float16,0,0.28515734275182086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,1,64,0,1,fp8,fp8,0,0.406607985496521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,1,64,0,1,float16,fp8,0,0.4469386736551921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,4,64,128,1,float16,fp8,0,0.28600533803304035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,4,64,128,1,fp8,fp8,0,0.26941333214441937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,float16,0,0.4448426564534505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,4,64,0,1,fp8,fp8,0,0.4060320059458415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,float16,0,0.28616533676783246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,4,64,0,1,float16,fp8,0,0.44652799765268963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,8,64,128,1,fp8,fp8,0,0.2714453339576721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,8,64,128,1,float16,fp8,0,0.2876906593640645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,float16,0,0.4477279980977376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,8,64,0,1,fp8,fp8,0,0.40831998984018963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,8,64,0,1,float16,fp8,0,0.4481653372446696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,float16,0,3.4696372350056968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,1,64,128,1,fp8,fp8,0,3.160319964090983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,1,64,128,1,float16,fp8,0,3.4999891916910806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,float16,0,5.883455912272136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,float16,0,3.494394620259603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,1,64,0,1,fp8,fp8,0,5.194213231404622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,1,64,0,1,float16,fp8,0,5.919439951578776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,4,64,128,1,float16,fp8,0,3.5258347193400064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,float16,0,5.918752034505208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,4,64,128,1,fp8,fp8,0,3.2070185343424478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,4,64,0,1,float16,fp8,0,5.957013448079427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,float16,0,3.514095942179362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,4,64,0,1,fp8,fp8,0,5.237226804097493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,float16,0,5.957146962483724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,8,64,128,1,float16,fp8,0,3.5484161376953125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,8,64,128,1,fp8,fp8,0,3.231583913167318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,float16,0,2.005616029103597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,8,64,0,1,float16,fp8,0,5.992074966430664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,float16,0,3.303701400756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,96,64,128,1,float16,fp8,0,2.0508693059285483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,8,64,0,1,fp8,fp8,0,5.26906681060791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,96,64,128,1,fp8,fp8,0,1.9091466267903645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,float16,0,1.7595787048339844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,96,64,0,1,float16,fp8,0,3.3462934494018555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,96,64,0,1,fp8,fp8,0,2.9833545684814453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,float16,0,2.9741385777791343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,1,64,128,1,float16,fp8,0,1.7761066754659016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,1,64,128,1,fp8,fp8,0,1.6082560221354167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,float16,0,1.7699947357177734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,1,64,0,1,float16,fp8,0,2.9933811823527017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,1,64,0,1,fp8,fp8,0,2.6308959325154624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,float16,0,2.988016128540039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,4,64,128,1,fp8,fp8,0,1.6218612988789876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,4,64,128,1,float16,fp8,0,1.7877012888590496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,float16,0,1.7803680102030437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,4,64,0,1,float16,fp8,0,3.0051040649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,4,64,0,1,fp8,fp8,0,2.64084259668986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,float16,0,3.0092426935831704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,8,64,128,1,fp8,fp8,0,1.635423978169759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,8,64,128,1,float16,fp8,0,1.799242655436198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,float16,0,1.0336746374766033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,8,64,0,1,float16,fp8,0,3.022416114807129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,float16,0,1.691498597462972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,8,64,0,1,fp8,fp8,0,2.6606666247049966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,96,64,128,1,float16,fp8,0,1.0595413049062092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,96,64,128,1,fp8,fp8,0,0.9892053604125977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,float16,0,0.9144480228424072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,96,64,0,1,float16,fp8,0,1.718506654103597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,96,64,0,1,fp8,fp8,0,1.5324799219767253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,float16,0,1.5282293955485027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,1,64,128,1,float16,fp8,0,0.9244853655497233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,1,64,128,1,fp8,fp8,0,0.8404959837595621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,1,64,0,1,float16,fp8,0,1.5387147267659504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,float16,0,0.9181226889292399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,1,64,0,1,fp8,fp8,0,1.356858730316162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,float16,0,1.5371200243632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,4,64,128,1,fp8,fp8,0,0.8466560045878092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,4,64,128,1,float16,fp8,0,0.9294293721516927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,4,64,0,1,float16,fp8,0,1.545962651570638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,float16,0,0.9243679841359457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,4,64,0,1,fp8,fp8,0,1.3645599683125813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,float16,0,1.5456479390462239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,8,64,128,1,float16,fp8,0,0.9342133204142252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,8,64,128,1,fp8,fp8,0,0.8534933725992838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,float16,0,0.5496533314387003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,8,64,0,1,float16,fp8,0,1.554634730021159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,8,64,0,1,fp8,fp8,0,1.3727466265360515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,96,64,128,1,float16,fp8,0,0.564192016919454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,float16,0,0.8848693370819092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,96,64,128,1,fp8,fp8,0,0.5309600035349528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,float16,0,0.4891253312428792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,96,64,0,1,fp8,fp8,0,0.8069600264231364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,96,64,0,1,float16,fp8,0,0.9019680023193359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,1,64,128,1,fp8,fp8,0,0.4556533495585124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,1,64,128,1,float16,fp8,0,0.4947253465652466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,float16,0,0.8035253683725992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,float16,0,0.49267733097076416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,1,64,0,1,fp8,fp8,0,0.7195093631744385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,1,64,0,1,float16,fp8,0,0.8083679676055908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,4,64,128,1,fp8,fp8,0,0.4580320119857788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,4,64,128,1,float16,fp8,0,0.498143990834554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,float16,0,0.8080693085988363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,4,64,0,1,fp8,fp8,0,0.7227839628855387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,float16,0,0.4957546790440877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,4,64,0,1,float16,fp8,0,0.8125867048899332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,8,64,128,1,fp8,fp8,0,0.46113598346710205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,8,64,128,1,float16,fp8,0,0.5020053386688232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,float16,0,0.8119786580403646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,float16,0,0.3104426662127177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,8,64,0,1,fp8,fp8,0,0.7273973623911539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,8,64,0,1,float16,fp8,0,0.8184320131937662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,float16,0,0.4845279852549235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,96,64,128,1,float16,fp8,0,0.3200693329175313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,96,64,128,1,fp8,fp8,0,0.30204800764719647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,96,64,0,1,float16,fp8,0,0.49504534403483075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,float16,0,0.2755360007286072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,96,64,0,1,fp8,fp8,0,0.4471999804178874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,float16,0,0.43934400876363117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,1,64,128,1,fp8,fp8,0,0.26217599709828693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,1,64,128,1,float16,fp8,0,0.27990400791168213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,1,64,0,1,fp8,fp8,0,0.40064533551534015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,1,64,0,1,float16,fp8,0,0.4420479933420817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,float16,0,0.2782026727994283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,float16,0,0.44193601608276367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,4,64,128,1,float16,fp8,0,0.2815413276354472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,4,64,128,1,fp8,fp8,0,0.26524267594019574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,4,64,0,1,float16,fp8,0,0.44536534945170086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,float16,0,0.28140799204508465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,4,64,0,1,fp8,fp8,0,0.4025386571884155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,float16,0,0.4454880158106486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,8,64,128,1,float16,fp8,0,0.2839733362197876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,8,64,128,1,fp8,fp8,0,0.26710400978724164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,8,64,0,1,float16,fp8,0,0.44757866859436035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,float16,0,0.2200053334236145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,8,64,0,1,fp8,fp8,0,0.40462934970855713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,float16,0,0.31668800115585327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,96,64,128,1,fp8,fp8,0,0.20781866709391275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,96,64,128,1,float16,fp8,0,0.22023999691009521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,96,64,0,1,float16,fp8,0,0.31571733951568604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,96,64,0,1,fp8,fp8,0,0.28548266490300495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,float16,0,0.2160159945487976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,1,64,128,1,float16,fp8,0,0.21596266825993857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,float16,0,0.30610666672388714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,1,64,128,1,fp8,fp8,0,0.2057173252105713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,1,64,0,1,float16,fp8,0,0.3070826729138692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,float16,0,0.21659733851750693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,1,64,0,1,fp8,fp8,0,0.2800053358078003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,float16,0,0.30616533756256104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,4,64,128,1,float16,fp8,0,0.21819732586542764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,4,64,128,1,fp8,fp8,0,0.205402672290802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,4,64,0,1,float16,fp8,0,0.3062453269958496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,4,64,0,1,fp8,fp8,0,0.2805493275324504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,float16,0,0.2177013357480367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,8,64,128,1,float16,fp8,0,0.21808532873789468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,float16,0,0.3073493242263794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,8,64,128,1,fp8,fp8,0,0.20556267102559408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,8,64,0,1,float16,fp8,0,0.3065599997838338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,8,64,0,1,fp8,fp8,0,0.28173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,1,64,128,1,fp8,fp8,0,4.205936113993327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,float16,0,4.623392105102539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,1,64,128,1,float16,fp8,0,4.645946820576985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,float16,0,6.503466924031575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,float16,0,4.7503306070963545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,1,64,0,1,fp8,fp8,0,5.765312194824219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,1,64,0,1,float16,fp8,0,6.523338953653972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,float16,0,6.635162353515625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,4,64,128,1,float16,fp8,0,4.723189353942871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,4,64,128,1,fp8,fp8,0,4.24944527943929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,4,64,0,1,float16,fp8,0,6.606042861938477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,4,64,0,1,fp8,fp8,0,5.813194910685222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,float16,0,4.7641706466674805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,float16,0,6.672282536824544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,8,64,128,1,float16,fp8,0,4.739770571390788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,8,64,128,1,fp8,fp8,0,4.287919998168945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,float16,0,2.627247969309489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,8,64,0,1,float16,fp8,0,6.644880294799805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,float16,0,3.659893353780111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,8,64,0,1,fp8,fp8,0,5.8567250569661455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,96,64,128,1,float16,fp8,0,2.6791305541992188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,96,64,128,1,fp8,fp8,0,2.5024213790893555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,96,64,0,1,fp8,fp8,0,3.356682777404785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,96,64,0,1,float16,fp8,0,3.712373415629069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,float16,0,2.296677271525065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,float16,0,3.242010752360026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,1,64,128,1,fp8,fp8,0,2.0975093841552734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,1,64,128,1,float16,fp8,0,2.3196214040120444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,1,64,0,1,float16,fp8,0,3.2594292958577475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,float16,0,2.314623991648356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,1,64,0,1,fp8,fp8,0,2.881296157836914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,float16,0,3.2572692235310874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,4,64,128,1,fp8,fp8,0,2.1152000427246094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,4,64,128,1,float16,fp8,0,2.3375040690104165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,float16,0,2.3297707239786782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,4,64,0,1,float16,fp8,0,3.278208096822103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,4,64,0,1,fp8,fp8,0,2.901119867960612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,float16,0,3.2838452657063804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,8,64,128,1,fp8,fp8,0,2.132805347442627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,8,64,128,1,float16,fp8,0,2.3527092933654785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,float16,0,1.3359467188517253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,8,64,0,1,float16,fp8,0,3.30509344736735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,float16,0,1.8575679461161296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,8,64,0,1,fp8,fp8,0,2.9230292638142905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,96,64,128,1,fp8,fp8,0,1.2736586729685466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,96,64,128,1,float16,fp8,0,1.3665226300557454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,96,64,0,1,float16,fp8,0,1.8890132904052734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,float16,0,1.1749013264973958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,96,64,0,1,fp8,fp8,0,1.7042454083760579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,float16,0,1.6482292811075847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,1,64,128,1,float16,fp8,0,1.1849439938863118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,1,64,128,1,fp8,fp8,0,1.0735893249511719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,1,64,0,1,fp8,fp8,0,1.4681439399719238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,1,64,0,1,float16,fp8,0,1.6606987317403157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,float16,0,1.182037353515625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,float16,0,1.6589867273966472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,4,64,128,1,fp8,fp8,0,1.0825119813283284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,4,64,128,1,float16,fp8,0,1.192026694615682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,4,64,0,1,float16,fp8,0,1.67304531733195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,4,64,0,1,fp8,fp8,0,1.4803412755330403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,float16,0,1.189237356185913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,float16,0,1.66759459177653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,8,64,128,1,float16,fp8,0,1.201269308725993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,8,64,128,1,fp8,fp8,0,1.0925599733988445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,float16,0,0.6930987040201823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,8,64,0,1,float16,fp8,0,1.683008035024007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,float16,0,0.959007978439331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,8,64,0,1,fp8,fp8,0,1.4889599482218425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,96,64,128,1,float16,fp8,0,0.7104960282643636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,96,64,128,1,fp8,fp8,0,0.66484268506368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,96,64,0,1,fp8,fp8,0,0.881605307261149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,96,64,0,1,float16,fp8,0,0.9772373040517172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,float16,0,0.6120426654815674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,float16,0,0.8548746903737386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,1,64,128,1,fp8,fp8,0,0.5641333262125651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,1,64,128,1,float16,fp8,0,0.6181386709213257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,1,64,0,1,float16,fp8,0,0.8601706822713217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,1,64,0,1,fp8,fp8,0,0.7664960225423177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,float16,0,0.61516801516215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,float16,0,0.8592853546142578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,4,64,128,1,float16,fp8,0,0.6219679911931356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,4,64,128,1,fp8,fp8,0,0.5686879952748617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,4,64,0,1,float16,fp8,0,0.8649760087331136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,4,64,0,1,fp8,fp8,0,0.77020796140035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,float16,0,0.6191093524297079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,float16,0,0.8637333710988363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,8,64,128,1,float16,fp8,0,0.6259733438491821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,8,64,128,1,fp8,fp8,0,0.5724906524022421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,8,64,0,1,float16,fp8,0,0.8711786270141602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,float16,0,0.3716213305791219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,8,64,0,1,fp8,fp8,0,0.7756693363189697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,float16,0,0.5100266536076864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,96,64,128,1,float16,fp8,0,0.3820426861445109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,96,64,128,1,fp8,fp8,0,0.3593759934107463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,float16,0,0.32922667264938354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,96,64,0,1,float16,fp8,0,0.5204159816106161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,96,64,0,1,fp8,fp8,0,0.4720799922943115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,float16,0,0.4541013240814209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,1,64,128,1,fp8,fp8,0,0.3097760081291199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,1,64,128,1,float16,fp8,0,0.33286933104197186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,1,64,0,1,float16,fp8,0,0.45717334747314453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,1,64,0,1,fp8,fp8,0,0.4136799971262614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,float16,0,0.33058132727940875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,float16,0,0.45710933208465576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,4,64,128,1,float16,fp8,0,0.33488531907399494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,4,64,128,1,fp8,fp8,0,0.3105226755142212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,4,64,0,1,float16,fp8,0,0.45978665351867676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,float16,0,0.33345599969228107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,4,64,0,1,fp8,fp8,0,0.4149706761042277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,float16,0,0.4595946470896403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,8,64,128,1,float16,fp8,0,0.33854933579762775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,8,64,128,1,fp8,fp8,0,0.31251732508341473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,8,64,0,1,float16,fp8,0,0.46343998114267987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,8,64,0,1,fp8,fp8,0,0.4183573325475057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,float16,0,0.2143519918123881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,float16,0,0.28649065891901654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,96,64,128,1,float16,fp8,0,0.21854400634765625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,96,64,128,1,fp8,fp8,0,0.20791999499003092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,96,64,0,1,float16,fp8,0,0.2917813261349996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,float16,0,0.18729599316914877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,96,64,0,1,fp8,fp8,0,0.2678346633911133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,float16,0,0.2526879906654358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,1,64,128,1,float16,fp8,0,0.18754667043685913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,1,64,128,1,fp8,fp8,0,0.17903467019399008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,1,64,0,1,float16,fp8,0,0.2548639973004659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,1,64,0,1,fp8,fp8,0,0.23455999294916788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,float16,0,0.18711467583974203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,float16,0,0.25470399856567383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,4,64,128,1,float16,fp8,0,0.1896373430887858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,4,64,128,1,fp8,fp8,0,0.18096532424290976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,4,64,0,1,float16,fp8,0,0.2552799979845683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,4,64,0,1,fp8,fp8,0,0.23705067237218222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,float16,0,0.18930667638778687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,float16,0,0.255023996035258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,8,64,128,1,float16,fp8,0,0.19200533628463745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,8,64,128,1,fp8,fp8,0,0.1819146672884623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,8,64,0,1,float16,fp8,0,0.25920534133911133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,float16,0,0.15453333655993143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,8,64,0,1,fp8,fp8,0,0.23877867062886557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,96,64,128,1,float16,fp8,0,0.15457066893577576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,float16,0,0.19348265727361044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,96,64,128,1,fp8,fp8,0,0.14467199643452963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,float16,0,0.15179199973742166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,96,64,0,1,fp8,fp8,0,0.17714667320251465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,96,64,0,1,float16,fp8,0,0.1933493415514628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,1,64,128,1,float16,fp8,0,0.15040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,1,64,128,1,fp8,fp8,0,0.14215466380119324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,float16,0,0.18744534254074097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,1,64,0,1,float16,fp8,0,0.18673600753148398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,1,64,0,1,fp8,fp8,0,0.17253865798314413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,float16,0,0.1504906713962555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,4,64,128,1,float16,fp8,0,0.15037866433461508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,4,64,128,1,fp8,fp8,0,0.14216533303260803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,float16,0,0.18709333737691244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,float16,0,0.1497866710027059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,4,64,0,1,fp8,fp8,0,0.1728480060895284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,4,64,0,1,float16,fp8,0,0.18769599994023642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,float16,0,0.18667733669281006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,8,64,128,1,float16,fp8,0,0.15033599734306335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,8,64,128,1,fp8,fp8,0,0.1421119968096415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,8,64,0,1,fp8,fp8,0,0.17324266831080118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,8,64,0,1,float16,fp8,0,0.1872533361117045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,float16,0,3.431520144144694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,1,64,128,1,float16,fp8,0,3.4489707946777344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,float16,0,4.335562705993652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,1,64,128,1,fp8,fp8,0,3.11734930674235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,1,64,0,1,fp8,fp8,0,3.8682772318522134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,1,64,0,1,float16,fp8,0,4.354725201924642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,float16,0,3.4652160008748374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,float16,0,4.376837412516276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,4,64,128,1,fp8,fp8,0,3.1538880666097007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,4,64,128,1,float16,fp8,0,3.486309369405111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,4,64,0,1,float16,fp8,0,4.399242719014485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,float16,0,3.4785760243733725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,4,64,0,1,fp8,fp8,0,3.9077866872151694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,float16,0,4.4031680425008135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,8,64,128,1,float16,fp8,0,3.5037174224853516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,8,64,128,1,fp8,fp8,0,3.1749706268310547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,float16,0,1.9719573656717937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,8,64,0,1,float16,fp8,0,4.4348799387613935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,8,64,0,1,fp8,fp8,0,3.9335412979125977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,float16,0,2.4827733039855957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,96,64,128,1,float16,fp8,0,2.011626720428467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,96,64,128,1,fp8,fp8,0,1.8719414075215657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,96,64,0,1,float16,fp8,0,2.526101271311442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,float16,0,1.7224106788635254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,96,64,0,1,fp8,fp8,0,2.2897280057271323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,float16,0,2.177994728088379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,1,64,128,1,float16,fp8,0,1.7385759353637695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,1,64,128,1,fp8,fp8,0,1.570543924967448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,1,64,0,1,float16,fp8,0,2.193722724914551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,1,64,0,1,fp8,fp8,0,1.9461119969685872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,float16,0,1.7336799303690593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,float16,0,2.193376064300537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,4,64,128,1,float16,fp8,0,1.7497493426005046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,4,64,128,1,fp8,fp8,0,1.5846880276997883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,4,64,0,1,float16,fp8,0,2.2107359568277993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,4,64,0,1,fp8,fp8,0,1.9627413749694824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,float16,0,1.7434986432393391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,float16,0,2.20741335550944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,8,64,128,1,fp8,fp8,0,1.5970773696899414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,8,64,128,1,float16,fp8,0,1.7612427075703938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,float16,0,1.004794677098592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,8,64,0,1,float16,fp8,0,2.223349412282308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,float16,0,1.2653226852416992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,8,64,0,1,fp8,fp8,0,1.9775892893473308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,96,64,128,1,float16,fp8,0,1.0290826956431072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,96,64,128,1,fp8,fp8,0,0.9578506946563721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,96,64,0,1,float16,fp8,0,1.289237340291341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,96,64,0,1,fp8,fp8,0,1.1708160241444905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,float16,0,0.8810880184173584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,float16,0,1.1125173568725586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,1,64,128,1,float16,fp8,0,0.8912533124287924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,1,64,128,1,fp8,fp8,0,0.80841064453125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,1,64,0,1,float16,fp8,0,1.120794693628947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,1,64,0,1,fp8,fp8,0,0.9985067049662272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,float16,0,0.8885813554128011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,float16,0,1.1204746564229329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,4,64,128,1,float16,fp8,0,0.8977973461151123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,4,64,128,1,fp8,fp8,0,0.8156586488087972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,4,64,0,1,float16,fp8,0,1.1293493111928303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,4,64,0,1,fp8,fp8,0,1.0065120061238606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,float16,0,0.8941760063171387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,float16,0,1.131482680638631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,8,64,128,1,float16,fp8,0,0.9028586546579996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,8,64,128,1,fp8,fp8,0,0.8218932946523031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,float16,0,0.5238720178604126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,8,64,0,1,float16,fp8,0,1.13811198870341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,8,64,0,1,fp8,fp8,0,1.0127253532409668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,float16,0,0.6578239997227987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,96,64,128,1,float16,fp8,0,0.5378666718800863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,96,64,128,1,fp8,fp8,0,0.5018986860911051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,96,64,0,1,float16,fp8,0,0.6703306833902994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,96,64,0,1,fp8,fp8,0,0.6112213134765625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,float16,0,0.4623200098673503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,1,64,128,1,fp8,fp8,0,0.4275360107421875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,1,64,128,1,float16,fp8,0,0.46568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,float16,0,0.5816906690597534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,1,64,0,1,fp8,fp8,0,0.5254079898198446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,1,64,0,1,float16,fp8,0,0.5841600100199381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,float16,0,0.46461331844329834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,4,64,128,1,fp8,fp8,0,0.43062933286031085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,4,64,128,1,float16,fp8,0,0.46833598613739014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,float16,0,0.5833439826965332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,float16,0,0.4675840139389038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,4,64,0,1,fp8,fp8,0,0.5279786586761475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,4,64,0,1,float16,fp8,0,0.5876319805781046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,8,64,128,1,fp8,fp8,0,0.43485867977142334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,8,64,128,1,float16,fp8,0,0.4723786513010661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,float16,0,0.5871786673863729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,float16,0,0.28296534220377606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,8,64,0,1,fp8,fp8,0,0.5322506825129191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,8,64,0,1,float16,fp8,0,0.592906673749288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,float16,0,0.3527946472167969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,96,64,128,1,float16,fp8,0,0.2911199927330017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,96,64,128,1,fp8,fp8,0,0.2737226684888204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,96,64,0,1,float16,fp8,0,0.3612533410390218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,float16,0,0.24723732471466064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,96,64,0,1,fp8,fp8,0,0.33036800225575763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,1,64,128,1,float16,fp8,0,0.250383992989858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,1,64,128,1,fp8,fp8,0,0.23425066471099854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,float16,0,0.31028799215952557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,1,64,0,1,float16,fp8,0,0.31065066655476886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,float16,0,0.24884267648061117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,1,64,0,1,fp8,fp8,0,0.28565865755081177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,4,64,128,1,float16,fp8,0,0.2531999945640564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,float16,0,0.31249600648880005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,4,64,128,1,fp8,fp8,0,0.23763734102249146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,4,64,0,1,float16,fp8,0,0.31385600566864014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,4,64,0,1,fp8,fp8,0,0.28947200377782184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,float16,0,0.2534453272819519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,float16,0,0.3144586682319641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,8,64,128,1,float16,fp8,0,0.2550293405850728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,8,64,128,1,fp8,fp8,0,0.2400426665941874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,8,64,0,1,float16,fp8,0,0.31726932525634766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,float16,0,0.1644426683584849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,8,64,0,1,fp8,fp8,0,0.28966933488845825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,float16,0,0.2018773357073466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,96,64,128,1,float16,fp8,0,0.16739734013875326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,96,64,128,1,fp8,fp8,0,0.16029333074887595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,96,64,0,1,float16,fp8,0,0.2041013240814209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,96,64,0,1,fp8,fp8,0,0.19028266270955405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,float16,0,0.14266133308410645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,float16,0,0.17811733484268188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,1,64,128,1,float16,fp8,0,0.14288533727327982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,1,64,128,1,fp8,fp8,0,0.1320799986521403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,1,64,0,1,float16,fp8,0,0.17775466044743857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,1,64,0,1,fp8,fp8,0,0.1612320045630137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,float16,0,0.1421386698881785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,float16,0,0.17564266920089722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,4,64,128,1,fp8,fp8,0,0.13507733742396036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,4,64,128,1,float16,fp8,0,0.14258666833241782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,4,64,0,1,float16,fp8,0,0.17731199661890665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,4,64,0,1,fp8,fp8,0,0.16263467073440552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,float16,0,0.14231999715169272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,float16,0,0.17735467354456583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,8,64,128,1,float16,fp8,0,0.14563199877738953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,8,64,128,1,fp8,fp8,0,0.1381386617819468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,8,64,0,1,float16,fp8,0,0.17802667617797852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,float16,0,0.11577066779136658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,8,64,0,1,fp8,fp8,0,0.16500799854596457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,float16,0,0.13826666275660196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,96,64,128,1,float16,fp8,0,0.11621333161989848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,96,64,128,1,fp8,fp8,0,0.11166933178901672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,96,64,0,1,float16,fp8,0,0.13666666547457376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,96,64,0,1,fp8,fp8,0,0.12964266538619995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,float16,0,0.11573867003122966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,float16,0,0.13620266318321228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,1,64,128,1,float16,fp8,0,0.11546666423479716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,1,64,128,1,fp8,fp8,0,0.10966933767000835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,1,64,0,1,float16,fp8,0,0.13615467151006064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,1,64,0,1,fp8,fp8,0,0.12588799993197122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,float16,0,0.11547199885050456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,float16,0,0.13532267014185587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,4,64,128,1,float16,fp8,0,0.11565333604812622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,4,64,128,1,fp8,fp8,0,0.10937066872914632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,4,64,0,1,float16,fp8,0,0.13615999619166055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,4,64,0,1,fp8,fp8,0,0.1257866621017456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,float16,0,0.11372266213099162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,float16,0,0.13596266508102417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,8,64,128,1,float16,fp8,0,0.11361066500345866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,8,64,128,1,fp8,fp8,0,0.1097866694132487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,8,64,0,1,float16,fp8,0,0.136272003253301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,8,64,0,1,fp8,fp8,0,0.12522666652997336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,1,64,128,1,fp8,fp8,0,3.89412784576416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,float16,0,4.128181457519531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,1,64,128,1,float16,fp8,0,4.106170654296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,float16,0,4.665541330973308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,1,64,0,1,fp8,fp8,0,4.3508907953898115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,1,64,0,1,float16,fp8,0,4.677818616231282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,float16,0,4.223370552062988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,4,64,128,1,float16,fp8,0,4.194538752237956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,4,64,128,1,fp8,fp8,0,4.200021425882976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,float16,0,4.780703862508138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,4,64,0,1,float16,fp8,0,4.775631904602051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,float16,0,4.253333409627278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,4,64,0,1,fp8,fp8,0,4.664815902709961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,float16,0,4.831157366434733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,8,64,128,1,float16,fp8,0,4.279610633850098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,8,64,128,1,fp8,fp8,0,4.206133206685384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,float16,0,2.2920212745666504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,8,64,0,1,float16,fp8,0,4.834725379943848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,8,64,0,1,fp8,fp8,0,4.6326398849487305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,float16,0,2.6147252718607583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,96,64,128,1,float16,fp8,0,2.2702080408732095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,96,64,128,1,fp8,fp8,0,2.248682657877604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,96,64,0,1,float16,fp8,0,2.6096693674723306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,float16,0,2.0610079765319824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,96,64,0,1,fp8,fp8,0,2.5037280718485513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,float16,0,2.344144026438395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,1,64,128,1,float16,fp8,0,2.0595146814982095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,1,64,128,1,fp8,fp8,0,1.9398880004882812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,1,64,0,1,float16,fp8,0,2.334544022878011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,1,64,0,1,fp8,fp8,0,2.1680906613667807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,float16,0,2.071434656778971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,float16,0,2.360117276509603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,4,64,128,1,float16,fp8,0,2.0687146186828613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,4,64,128,1,fp8,fp8,0,2.066864013671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,4,64,0,1,float16,fp8,0,2.3548266092936196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,4,64,0,1,fp8,fp8,0,2.297584056854248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,float16,0,2.0732266108194985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,float16,0,2.3703999519348145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,8,64,128,1,float16,fp8,0,2.068175951639811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,8,64,128,1,fp8,fp8,0,2.0763413111368814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,float16,0,1.1188746293385823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,8,64,0,1,float16,fp8,0,2.3642932573954263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,8,64,0,1,fp8,fp8,0,2.307957331339518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,96,64,128,1,float16,fp8,0,1.1049173672993977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,float16,0,1.294591983159383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,96,64,128,1,fp8,fp8,0,1.1268853346506755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,96,64,0,1,float16,fp8,0,1.2721386750539143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,float16,0,1.0394879976908367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,96,64,0,1,fp8,fp8,0,1.2566933631896973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,float16,0,1.1827733516693115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,1,64,128,1,float16,fp8,0,1.0351413091023762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,1,64,128,1,fp8,fp8,0,0.9580372969309489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,1,64,0,1,float16,fp8,0,1.1806613604227703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,1,64,0,1,fp8,fp8,0,1.0725013415018718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,float16,0,1.04366930325826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,float16,0,1.1890933513641357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,4,64,128,1,float16,fp8,0,1.0445866584777832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,4,64,128,1,fp8,fp8,0,0.9903146425882975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,4,64,0,1,float16,fp8,0,1.1876266797383626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,4,64,0,1,fp8,fp8,0,1.1065866947174072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,float16,0,1.0470879872639973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,float16,0,1.1910560131072998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,8,64,128,1,float16,fp8,0,1.0399733384450276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,8,64,128,1,fp8,fp8,0,0.9845120112101237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,8,64,0,1,float16,fp8,0,1.1871573130289714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,float16,0,0.5707253217697144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,8,64,0,1,fp8,fp8,0,1.0992746353149414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,float16,0,0.6586879889170328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,96,64,128,1,float16,fp8,0,0.5594880183537801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,96,64,128,1,fp8,fp8,0,0.5676159858703613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,96,64,0,1,float16,fp8,0,0.648746649424235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,96,64,0,1,fp8,fp8,0,0.6363893349965414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,float16,0,0.5301493406295776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,float16,0,0.6020479996999105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,1,64,128,1,float16,fp8,0,0.5298240184783936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,1,64,128,1,fp8,fp8,0,0.48797865708669025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,1,64,0,1,float16,fp8,0,0.6032266616821289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,1,64,0,1,fp8,fp8,0,0.5453866720199585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,float16,0,0.5328160127003988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,float16,0,0.605183998743693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,4,64,128,1,float16,fp8,0,0.5324639876683553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,4,64,128,1,fp8,fp8,0,0.4978773196538289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,4,64,0,1,float16,fp8,0,0.6047466595967611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,4,64,0,1,fp8,fp8,0,0.5559893449147543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,float16,0,0.5332106749216715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,float16,0,0.607370654741923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,8,64,128,1,float16,fp8,0,0.5298826694488525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,8,64,128,1,fp8,fp8,0,0.5004053513209025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,float16,0,0.29819732904434204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,8,64,0,1,float16,fp8,0,0.6062719821929932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,8,64,0,1,fp8,fp8,0,0.5599626700083414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,float16,0,0.3447573184967041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,96,64,128,1,float16,fp8,0,0.293887992699941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,96,64,128,1,fp8,fp8,0,0.29450666904449463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,96,64,0,1,fp8,fp8,0,0.3311413327852885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,96,64,0,1,float16,fp8,0,0.33876268068949383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,float16,0,0.2754080096880595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,float16,0,0.3123306632041931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,1,64,128,1,fp8,fp8,0,0.2545386751492818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,1,64,128,1,float16,fp8,0,0.2732479969660441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,1,64,0,1,float16,fp8,0,0.3118133346239726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,1,64,0,1,fp8,fp8,0,0.28551467259724933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,float16,0,0.2767893274625142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,float16,0,0.31360532840092975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,4,64,128,1,float16,fp8,0,0.27553067604700726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,4,64,128,1,fp8,fp8,0,0.26075732707977295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,4,64,0,1,float16,fp8,0,0.313429335753123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,4,64,0,1,fp8,fp8,0,0.28989867369333905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,float16,0,0.277946670850118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,8,64,128,1,float16,fp8,0,0.276309331258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,float16,0,0.3147306640942891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,8,64,128,1,fp8,fp8,0,0.26197866598765057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,8,64,0,1,fp8,fp8,0,0.29156800111134845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,8,64,0,1,float16,fp8,0,0.31547733147939044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,float16,0,0.1600320041179657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,96,64,128,1,float16,fp8,0,0.1586026648680369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,float16,0,0.18464533487955728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,96,64,128,1,fp8,fp8,0,0.1588213344415029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,96,64,0,1,float16,fp8,0,0.18125333388646445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,96,64,0,1,fp8,fp8,0,0.17693867286046347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,float16,0,0.14591466387112936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,1,64,128,1,float16,fp8,0,0.14422399799029031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,float16,0,0.1649386684099833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,1,64,128,1,fp8,fp8,0,0.1367733379205068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,1,64,0,1,fp8,fp8,0,0.1532693306605021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,1,64,0,1,float16,fp8,0,0.16478932897249857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,float16,0,0.1442506710688273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,4,64,128,1,float16,fp8,0,0.14587733149528503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,float16,0,0.16531200210253397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,4,64,128,1,fp8,fp8,0,0.1381386617819468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,4,64,0,1,fp8,fp8,0,0.15493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,4,64,0,1,float16,fp8,0,0.16597333550453186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,float16,0,0.14520532886187235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,8,64,128,1,float16,fp8,0,0.14667200048764548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,float16,0,0.1669493317604065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,8,64,128,1,fp8,fp8,0,0.1402720014254252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,float16,0,0.09294933080673218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,8,64,0,1,float16,fp8,0,0.16696532567342123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,8,64,0,1,fp8,fp8,0,0.1569706698258718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,96,64,128,1,float16,fp8,0,0.09001599748929341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,96,64,128,1,fp8,fp8,0,0.09187199672063191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,float16,0,0.10579733053843181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,96,64,0,1,float16,fp8,0,0.10345600048700969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,96,64,0,1,fp8,fp8,0,0.10201600193977356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,float16,0,0.08138133088747661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,float16,0,0.09271466732025146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,1,64,128,1,float16,fp8,0,0.08141866823037465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,1,64,128,1,fp8,fp8,0,0.07484266658624013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,1,64,0,1,float16,fp8,0,0.09433066844940186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,1,64,0,1,fp8,fp8,0,0.08559999863306682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,float16,0,0.08226666847864787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,float16,0,0.09328533212343852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,4,64,128,1,float16,fp8,0,0.08185066779454549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,4,64,128,1,fp8,fp8,0,0.07679466903209686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,4,64,0,1,float16,fp8,0,0.09282666444778442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,4,64,0,1,fp8,fp8,0,0.08708266417185466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,float16,0,0.08218666911125183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,float16,0,0.09304533402125041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,8,64,128,1,float16,fp8,0,0.0825386643409729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,8,64,128,1,fp8,fp8,0,0.0771679977575938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,8,64,0,1,float16,fp8,0,0.093231995900472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,float16,0,0.051167999704678856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,8,64,0,1,fp8,fp8,0,0.08585600058237712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,float16,0,0.057914664347966514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,96,64,128,1,fp8,fp8,0,0.049829334020614624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,96,64,128,1,float16,fp8,0,0.05073600014050802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,96,64,0,1,float16,fp8,0,0.05930666625499725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,96,64,0,1,fp8,fp8,0,0.05634133517742157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,float16,0,0.050144001841545105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,float16,0,0.05691733459631602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,1,64,128,1,float16,fp8,0,0.0499839981396993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,1,64,128,1,fp8,fp8,0,0.04741866886615753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,1,64,0,1,float16,fp8,0,0.055829331278800964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,1,64,0,1,fp8,fp8,0,0.05202666421731313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,float16,0,0.049423997600873314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,float16,0,0.05602133274078369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,4,64,128,1,float16,fp8,0,0.049642667174339294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,4,64,128,1,fp8,fp8,0,0.04696000119050344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,4,64,0,1,float16,fp8,0,0.05801066756248474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,float16,0,0.04952000081539154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,4,64,0,1,fp8,fp8,0,0.052416001756985985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,8,64,128,1,float16,fp8,0,0.04984533290068308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,float16,0,0.05840000013510386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,8,64,0,1,float16,fp8,0,0.05658666789531708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,8,64,128,1,fp8,fp8,0,0.047040000557899475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,8,64,0,1,fp8,fp8,0,0.05402666827042898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,1,64,128,1,fp8,fp8,0,3.798394521077474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,1,64,128,1,float16,float16,0,3.9898719787597656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,1,64,0,1,float16,float16,0,4.000085194905599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,1,64,128,1,float16,fp8,0,3.9876159032185874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,1,64,0,1,fp8,fp8,0,3.7590611775716147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,1,64,0,1,float16,fp8,0,3.9954986572265625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,4,64,128,1,float16,float16,0,4.033424059549968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,4,64,0,1,float16,float16,0,4.071930567423503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,4,64,128,1,float16,fp8,0,4.02291202545166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,4,64,128,1,fp8,fp8,0,4.093434651692708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,4,64,0,1,float16,fp8,0,4.041589419047038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,4,64,0,1,fp8,fp8,0,4.060912132263184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,8,64,128,1,float16,float16,0,4.121408144632976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,8,64,0,1,float16,float16,0,4.14136536916097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,8,64,128,1,fp8,fp8,0,4.089621225992839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,8,64,128,1,float16,fp8,0,4.147568066914876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,96,64,128,1,float16,float16,0,2.2423413594563804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,8,64,0,1,float16,fp8,0,4.1365706125895185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,8,64,0,1,fp8,fp8,0,4.044426600138347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,96,64,0,1,float16,float16,0,2.2568960189819336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,96,64,128,1,float16,fp8,0,2.203978697458903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,96,64,128,1,fp8,fp8,0,2.18941338857015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,96,64,0,1,float16,fp8,0,2.265658696492513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,1,64,128,1,float16,float16,0,2.0048319498697915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,96,64,0,1,fp8,fp8,0,2.1903252601623535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,1,64,128,1,float16,fp8,0,2.00435733795166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,1,64,0,1,float16,float16,0,2.003887971242269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,1,64,128,1,fp8,fp8,0,1.881600062052409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,1,64,0,1,float16,fp8,0,2.0022133191426597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,1,64,0,1,fp8,fp8,0,1.8773013750712078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,4,64,128,1,float16,float16,0,2.0138986905415854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,4,64,0,1,float16,float16,0,2.014352003733317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,4,64,128,1,float16,fp8,0,2.0067520141601562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,4,64,128,1,fp8,fp8,0,2.0249759356180825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,4,64,0,1,float16,fp8,0,2.016064008076986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,4,64,0,1,fp8,fp8,0,1.999941349029541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,8,64,128,1,float16,float16,0,2.0077013969421387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,8,64,0,1,float16,float16,0,2.024400075276693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,8,64,128,1,float16,fp8,0,2.006869316101074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,8,64,128,1,fp8,fp8,0,2.020960013071696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,8,64,0,1,float16,fp8,0,2.0097440083821616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,96,64,128,1,float16,float16,0,1.0924746990203857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,8,64,0,1,fp8,fp8,0,2.0052159627278647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,96,64,0,1,float16,float16,0,1.1071946620941162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,96,64,128,1,float16,fp8,0,1.0800000031789143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,96,64,128,1,fp8,fp8,0,1.0976800123850505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,96,64,0,1,float16,fp8,0,1.0982720057169597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,96,64,0,1,fp8,fp8,0,1.0901866753896077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,1,64,128,1,float16,float16,0,1.0124213695526123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,1,64,0,1,float16,float16,0,1.0108746687571208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,1,64,128,1,float16,fp8,0,1.0084959665934246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,1,64,128,1,fp8,fp8,0,0.9341440200805664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,1,64,0,1,float16,fp8,0,1.0081173578898113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,1,64,0,1,fp8,fp8,0,0.9263520240783691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,4,64,128,1,float16,float16,0,1.0157066980997722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,4,64,0,1,float16,float16,0,1.0174026489257812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,4,64,128,1,float16,fp8,0,1.0115946928660076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,4,64,128,1,fp8,fp8,0,0.9907200336456299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,4,64,0,1,float16,fp8,0,1.0164053440093994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,4,64,0,1,fp8,fp8,0,0.9777493476867676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,8,64,128,1,float16,float16,0,1.0165706475575764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,8,64,0,1,float16,float16,0,1.0186773141225178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,8,64,128,1,float16,fp8,0,1.0112000306447346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,8,64,128,1,fp8,fp8,0,0.9741386572519938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,8,64,0,1,float16,fp8,0,1.0153813362121582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,96,64,128,1,float16,float16,0,0.5547200043996176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,8,64,0,1,fp8,fp8,0,0.9639626344045004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,96,64,0,1,float16,float16,0,0.5634346803029379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,96,64,128,1,float16,fp8,0,0.5458986759185791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,96,64,128,1,fp8,fp8,0,0.5521386861801147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,96,64,0,1,float16,fp8,0,0.5538080135981241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,96,64,0,1,fp8,fp8,0,0.5549706617991129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,1,64,128,1,float16,float16,0,0.516053318977356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,1,64,0,1,float16,float16,0,0.5166186491648356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,1,64,128,1,fp8,fp8,0,0.47572267055511475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,1,64,128,1,float16,fp8,0,0.5130826632181803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,1,64,0,1,float16,fp8,0,0.5140800078709921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,1,64,0,1,fp8,fp8,0,0.47113064924875897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,4,64,128,1,float16,float16,0,0.51747198899587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,4,64,0,1,float16,float16,0,0.5183146794637045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,4,64,128,1,float16,fp8,0,0.5153599977493286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,4,64,128,1,fp8,fp8,0,0.4842720031738281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,4,64,0,1,fp8,fp8,0,0.48015467325846356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,4,64,0,1,float16,fp8,0,0.5174933274586996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,8,64,128,1,float16,float16,0,0.5177760124206543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,8,64,0,1,float16,float16,0,0.5177973508834839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,8,64,128,1,fp8,fp8,0,0.48869868119557697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,8,64,128,1,float16,fp8,0,0.514901320139567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,8,64,0,1,float16,fp8,0,0.5159680048624674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,8,64,0,1,fp8,fp8,0,0.4821386734644572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,96,64,128,1,float16,float16,0,0.29200534025828045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,96,64,0,1,float16,float16,0,0.29549866914749146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,96,64,128,1,float16,fp8,0,0.28428266445795697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,96,64,128,1,fp8,fp8,0,0.2888000011444092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,96,64,0,1,float16,fp8,0,0.29072533051172894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,96,64,0,1,fp8,fp8,0,0.28942932685216266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,1,64,128,1,float16,float16,0,0.26648000876108807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,1,64,0,1,float16,float16,0,0.2688960035641988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,1,64,128,1,float16,fp8,0,0.267850657304128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,1,64,128,1,fp8,fp8,0,0.24942932526270548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,1,64,0,1,float16,fp8,0,0.266704003016154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,1,64,0,1,fp8,fp8,0,0.2461013396581014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,4,64,128,1,float16,float16,0,0.26756266752878827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,4,64,0,1,float16,float16,0,0.2688000003496806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,4,64,128,1,float16,fp8,0,0.2673119902610779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,4,64,128,1,fp8,fp8,0,0.25262399514516193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,4,64,0,1,float16,fp8,0,0.2680266698201497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,4,64,0,1,fp8,fp8,0,0.25114667415618896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,8,64,128,1,float16,float16,0,0.2688800096511841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,8,64,0,1,float16,float16,0,0.2708746592203776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,8,64,128,1,float16,fp8,0,0.267359991868337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,8,64,128,1,fp8,fp8,0,0.2542773286501567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,96,64,128,1,float16,float16,0,0.15628266334533691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,8,64,0,1,fp8,fp8,0,0.2539466619491577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,8,64,0,1,float16,fp8,0,0.2688586711883545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,96,64,0,1,float16,float16,0,0.15879467129707336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,96,64,128,1,float16,fp8,0,0.15315199891726175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,96,64,128,1,fp8,fp8,0,0.15547200043996176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,96,64,0,1,float16,fp8,0,0.15613866845766702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,96,64,0,1,fp8,fp8,0,0.1551040013631185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,1,64,128,1,float16,float16,0,0.14205867052078247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,1,64,0,1,float16,float16,0,0.14169599612553915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,1,64,128,1,float16,fp8,0,0.14196266730626425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,1,64,128,1,fp8,fp8,0,0.13290133078893027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,1,64,0,1,float16,fp8,0,0.14115200440088907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,1,64,0,1,fp8,fp8,0,0.13100799918174744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,4,64,128,1,float16,float16,0,0.14139200250307718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,4,64,0,1,float16,float16,0,0.1406880021095276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,4,64,128,1,fp8,fp8,0,0.13412800431251526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,4,64,128,1,float16,fp8,0,0.14071466525395712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,4,64,0,1,float16,fp8,0,0.1409386694431305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,4,64,0,1,fp8,fp8,0,0.13368533054987589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,8,64,128,1,float16,float16,0,0.1420693298180898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,8,64,0,1,float16,float16,0,0.14200533429781595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,8,64,128,1,float16,fp8,0,0.14257066448529562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,8,64,128,1,fp8,fp8,0,0.1376426617304484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,8,64,0,1,float16,fp8,0,0.14402666687965393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,96,64,128,1,float16,float16,0,0.08828266461690266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,96,64,0,1,float16,float16,0,0.09014933307965596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,8,64,0,1,fp8,fp8,0,0.13606933752695718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,96,64,128,1,float16,fp8,0,0.08694400389989217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,96,64,0,1,float16,fp8,0,0.08866666754086812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,96,64,128,1,fp8,fp8,0,0.09038399656613667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,96,64,0,1,fp8,fp8,0,0.08985066413879395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,1,64,128,1,float16,float16,0,0.08059733112653096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,1,64,0,1,float16,float16,0,0.07879999776681264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,1,64,128,1,float16,fp8,0,0.07934933404127757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,1,64,128,1,fp8,fp8,0,0.07418666779994965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,1,64,0,1,float16,fp8,0,0.08040000001589458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,1,64,0,1,fp8,fp8,0,0.07358933488527934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,4,64,128,1,float16,float16,0,0.07955199976762135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,4,64,0,1,float16,float16,0,0.08082666496435802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,4,64,128,1,float16,fp8,0,0.08071466783682506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,4,64,128,1,fp8,fp8,0,0.07521066566308339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,4,64,0,1,float16,fp8,0,0.08020799855391185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,4,64,0,1,fp8,fp8,0,0.07437333464622498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,8,64,128,1,float16,float16,0,0.08084799846013387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,8,64,0,1,float16,float16,0,0.0803413341442744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,8,64,128,1,float16,fp8,0,0.08036266764005025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,8,64,128,1,fp8,fp8,0,0.07446933289368947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,8,64,0,1,float16,fp8,0,0.07965333263079326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,96,64,128,1,float16,float16,0,0.049738665421803795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,8,64,0,1,fp8,fp8,0,0.07442666590213776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,96,64,0,1,float16,float16,0,0.0499946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,96,64,128,1,float16,fp8,0,0.0503359983364741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,96,64,128,1,fp8,fp8,0,0.050293331344922386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,96,64,0,1,float16,fp8,0,0.050154666105906166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,1,64,128,1,float16,float16,0,0.04780266682306925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,96,64,0,1,fp8,fp8,0,0.04935466746489207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,1,64,0,1,float16,float16,0,0.048058668772379555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,1,64,128,1,float16,fp8,0,0.04786666731039683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,1,64,128,1,fp8,fp8,0,0.0459199994802475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,1,64,0,1,float16,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,1,64,0,1,fp8,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,4,64,128,1,float16,float16,0,0.04972266654173533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,4,64,0,1,float16,float16,0,0.049685334165891014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,4,64,128,1,float16,fp8,0,0.04922133187452952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,4,64,128,1,fp8,fp8,0,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,4,64,0,1,float16,fp8,0,0.049839998284975685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,4,64,0,1,fp8,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,8,64,128,1,float16,float16,0,0.04969066878159841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,8,64,0,1,float16,float16,0,0.04979733129342397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,8,64,128,1,float16,fp8,0,0.047914668917655945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,8,64,128,1,fp8,fp8,0,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,8,64,0,1,float16,fp8,0,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,8,64,0,1,fp8,fp8,0,0.045935998360315956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,96,64,128,1,float16,float16,0,0.033530667424201965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,96,64,0,1,float16,float16,0,0.03377600014209747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,96,64,128,1,float16,fp8,0,0.0352906659245491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,96,64,128,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,96,64,0,1,float16,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,96,64,0,1,fp8,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,1,64,128,1,float16,float16,0,0.033413333197434746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,1,64,0,1,float16,float16,0,0.03375466664632162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,1,64,128,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,1,64,128,1,fp8,fp8,0,0.03190399954716364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,1,64,0,1,float16,fp8,0,0.03333866596221924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,1,64,0,1,fp8,fp8,0,0.03067733347415924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,4,64,128,1,float16,float16,0,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,4,64,0,1,float16,float16,0,0.032960000137488045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,4,64,128,1,float16,fp8,0,0.032885332902272545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,4,64,128,1,fp8,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,4,64,0,1,float16,fp8,0,0.03367999941110611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,4,64,0,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,8,64,128,1,float16,float16,0,0.03332266708215078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,8,64,0,1,float16,float16,0,0.03292266776164373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,8,64,128,1,fp8,fp8,0,0.03173866619666418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,8,64,0,1,float16,fp8,0,0.03380800038576126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,8,64,0,1,fp8,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,8,64,128,1,float16,fp8,0,0.034389334420363106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,1,64,0,1,float16,float16,0,1.8422986666361492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,1,64,128,1,float16,float16,0,1.8777173360188801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,1,64,128,1,float16,fp8,0,1.8727199236551921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,1,64,128,1,fp8,fp8,0,1.7530825932820637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,1,64,0,1,fp8,fp8,0,1.6758559544881184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,1,64,0,1,float16,fp8,0,1.830992062886556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,4,64,128,1,float16,float16,0,1.8706773122151692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,4,64,0,1,float16,float16,0,1.8323787053426106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,4,64,128,1,float16,fp8,0,1.867087999979655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,4,64,128,1,fp8,fp8,0,1.8611680666605632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,4,64,0,1,float16,fp8,0,1.825386683146159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,4,64,0,1,fp8,fp8,0,1.7956533432006836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,8,64,128,1,float16,float16,0,1.8863040606180828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,8,64,0,1,float16,float16,0,1.8626933097839355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,8,64,128,1,float16,fp8,0,1.872869332631429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,8,64,128,1,fp8,fp8,0,1.87881072362264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,8,64,0,1,float16,fp8,0,1.8434079488118489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,96,64,128,1,float16,float16,0,1.0228640238444011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,96,64,0,1,float16,float16,0,1.0045440196990967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,8,64,0,1,fp8,fp8,0,1.8032639821370442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,96,64,128,1,float16,fp8,0,1.0083893140157063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,96,64,128,1,fp8,fp8,0,1.0253492991129558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,96,64,0,1,float16,fp8,0,1.0013759930928547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,96,64,0,1,fp8,fp8,0,0.9907253583272299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,1,64,128,1,float16,float16,0,0.946943998336792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,1,64,0,1,float16,float16,0,0.9248426755269369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,1,64,128,1,float16,fp8,0,0.9438772996266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,1,64,128,1,fp8,fp8,0,0.8674506346384684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,1,64,0,1,float16,fp8,0,0.9259359836578369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,1,64,0,1,fp8,fp8,0,0.8352853457132975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,4,64,128,1,float16,float16,0,0.9431359767913818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,4,64,0,1,float16,float16,0,0.9237226645151774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,4,64,128,1,float16,fp8,0,0.9436319669087728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,4,64,128,1,fp8,fp8,0,0.9016853173573812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,4,64,0,1,float16,fp8,0,0.921295960744222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,4,64,0,1,fp8,fp8,0,0.8773653507232666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,8,64,128,1,float16,float16,0,0.9473600387573242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,8,64,0,1,float16,float16,0,0.9254346688588461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,8,64,128,1,float16,fp8,0,0.9433813095092773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,8,64,128,1,fp8,fp8,0,0.9086986382802328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,8,64,0,1,fp8,fp8,0,0.8742240269978842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,8,64,0,1,float16,fp8,0,0.926911989847819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,96,64,128,1,float16,float16,0,0.5226773420969645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,96,64,0,1,float16,float16,0,0.5095093250274658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,96,64,128,1,float16,fp8,0,0.512117346127828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,96,64,128,1,fp8,fp8,0,0.5234453280766805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,96,64,0,1,fp8,fp8,0,0.5044106642405192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,96,64,0,1,float16,fp8,0,0.5039413372675577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,1,64,128,1,float16,float16,0,0.4816693464914958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,1,64,0,1,float16,float16,0,0.4713493188222249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,1,64,128,1,fp8,fp8,0,0.44301335016886395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,1,64,128,1,float16,fp8,0,0.48028266429901123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,1,64,0,1,float16,fp8,0,0.46911998589833576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,1,64,0,1,fp8,fp8,0,0.4259893496831258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,4,64,128,1,float16,float16,0,0.4813653230667114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,4,64,0,1,float16,float16,0,0.47098668416341144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,4,64,128,1,fp8,fp8,0,0.4535679817199707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,4,64,128,1,float16,fp8,0,0.4800906578699748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,4,64,0,1,fp8,fp8,0,0.4345066547393799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,4,64,0,1,float16,fp8,0,0.47006932894388836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,8,64,0,1,float16,float16,0,0.47099733352661133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,8,64,128,1,float16,float16,0,0.48261864980061847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,8,64,128,1,float16,fp8,0,0.48106666405995685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,8,64,128,1,fp8,fp8,0,0.454858660697937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,96,64,128,1,float16,float16,0,0.2728533347447713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,96,64,0,1,float16,float16,0,0.2704266707102458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,8,64,0,1,fp8,fp8,0,0.43989332516988117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,8,64,0,1,float16,fp8,0,0.4702933231989543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,96,64,128,1,float16,fp8,0,0.2672266761461894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,96,64,128,1,fp8,fp8,0,0.2743946711222331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,96,64,0,1,float16,fp8,0,0.26320000489552814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,96,64,0,1,fp8,fp8,0,0.2640533248583476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,1,64,128,1,float16,float16,0,0.25030932823816937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,1,64,0,1,float16,float16,0,0.24641066789627075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,1,64,128,1,fp8,fp8,0,0.23315733671188354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,1,64,128,1,float16,fp8,0,0.25145600239435834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,1,64,0,1,float16,fp8,0,0.24594134092330933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,1,64,0,1,fp8,fp8,0,0.2227733333905538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,4,64,0,1,float16,float16,0,0.24528533220291138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,4,64,128,1,float16,float16,0,0.25127466519673664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,4,64,128,1,float16,fp8,0,0.250709335009257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,4,64,128,1,fp8,fp8,0,0.2358293334643046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,4,64,0,1,fp8,fp8,0,0.2272746761639913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,4,64,0,1,float16,fp8,0,0.24519999821980795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,8,64,128,1,float16,float16,0,0.25199999411900836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,8,64,0,1,float16,float16,0,0.24611733357111612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,8,64,128,1,fp8,fp8,0,0.23774933815002441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,8,64,128,1,float16,fp8,0,0.2520800034205119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,8,64,0,1,float16,fp8,0,0.2461493412653605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,8,64,0,1,fp8,fp8,0,0.2290346622467041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,96,64,128,1,float16,float16,0,0.1478613317012787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,96,64,0,1,float16,float16,0,0.1434933344523112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,96,64,128,1,float16,fp8,0,0.1444960037867228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,96,64,128,1,fp8,fp8,0,0.1483680009841919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,96,64,0,1,float16,fp8,0,0.14239466190338135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,96,64,0,1,fp8,fp8,0,0.14274666706720987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,1,64,128,1,float16,float16,0,0.1344266633192698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,1,64,0,1,float16,float16,0,0.1325279970963796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,1,64,128,1,float16,fp8,0,0.1348426640033722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,1,64,128,1,fp8,fp8,0,0.12660800417264303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,1,64,0,1,float16,fp8,0,0.13158399860064188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,1,64,0,1,fp8,fp8,0,0.12184533476829529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,4,64,128,1,float16,float16,0,0.13456533352533975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,4,64,0,1,float16,float16,0,0.13130133350690207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,4,64,128,1,float16,fp8,0,0.13471466302871704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,4,64,128,1,fp8,fp8,0,0.12786133090655008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,4,64,0,1,float16,fp8,0,0.13056533535321554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,4,64,0,1,fp8,fp8,0,0.12221333384513855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,8,64,128,1,float16,float16,0,0.1344213287035624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,8,64,0,1,float16,float16,0,0.13191999991734824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,8,64,128,1,float16,fp8,0,0.13453867038091025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,8,64,128,1,fp8,fp8,0,0.12838932871818542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,8,64,0,1,float16,fp8,0,0.13191466530164084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,8,64,0,1,fp8,fp8,0,0.12354666988054912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,96,64,128,1,float16,float16,0,0.08369599779446919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,96,64,0,1,float16,float16,0,0.08243200182914734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,96,64,128,1,float16,fp8,0,0.08301866551240285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,96,64,128,1,fp8,fp8,0,0.08648533622423808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,96,64,0,1,float16,fp8,0,0.08288000027338664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,96,64,0,1,fp8,fp8,0,0.08494933446248372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,1,64,128,1,float16,float16,0,0.07674133280913036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,1,64,0,1,float16,float16,0,0.0753653347492218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,1,64,128,1,float16,fp8,0,0.07758933305740356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,1,64,128,1,fp8,fp8,0,0.07161599894364674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,1,64,0,1,float16,fp8,0,0.07472000022729237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,1,64,0,1,fp8,fp8,0,0.0695360004901886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,4,64,128,1,float16,float16,0,0.07746666669845581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,4,64,0,1,float16,float16,0,0.07448533177375793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,4,64,128,1,float16,fp8,0,0.07796800136566162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,4,64,128,1,fp8,fp8,0,0.07263466715812683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,4,64,0,1,float16,fp8,0,0.07585066556930542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,4,64,0,1,fp8,fp8,0,0.06826133529345195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,8,64,128,1,float16,float16,0,0.07648533085982005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,8,64,0,1,float16,float16,0,0.07648000121116638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,8,64,128,1,float16,fp8,0,0.07666666805744171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,8,64,128,1,fp8,fp8,0,0.07160000006357829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,8,64,0,1,float16,fp8,0,0.07618666688601176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,8,64,0,1,fp8,fp8,0,0.06923200190067291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,96,64,128,1,float16,float16,0,0.047584002216657005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,96,64,0,1,float16,float16,0,0.046495998899141945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,96,64,128,1,float16,fp8,0,0.04799466828505198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,96,64,0,1,float16,fp8,0,0.0460746685663859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,96,64,128,1,fp8,fp8,0,0.04818133513132731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,96,64,0,1,fp8,fp8,0,0.04587199787298838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,1,64,128,1,float16,float16,0,0.04577066500981649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,1,64,0,1,float16,float16,0,0.04354133208592733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,1,64,128,1,float16,fp8,0,0.04623466730117798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,1,64,0,1,float16,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,1,64,128,1,fp8,fp8,0,0.04381866753101349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,1,64,0,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,4,64,128,1,float16,float16,0,0.04574933151404063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,4,64,0,1,float16,float16,0,0.04365866879622141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,4,64,128,1,float16,fp8,0,0.04587733248869578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,4,64,128,1,fp8,fp8,0,0.04403733213742574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,4,64,0,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,4,64,0,1,fp8,fp8,0,0.04246933261553446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,8,64,128,1,float16,float16,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,8,64,0,1,float16,float16,0,0.0444213350613912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,8,64,128,1,float16,fp8,0,0.04621333380540212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,8,64,128,1,fp8,fp8,0,0.044031997521718345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,8,64,0,1,float16,fp8,0,0.04517333209514618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,8,64,0,1,fp8,fp8,0,0.043621331453323364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,96,64,128,1,float16,float16,0,0.033520000676314034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,96,64,0,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,96,64,128,1,float16,fp8,0,0.03326933334271113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,96,64,128,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,96,64,0,1,float16,fp8,0,0.03130666663249334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,96,64,0,1,fp8,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,1,64,128,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,1,64,0,1,float16,float16,0,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,1,64,128,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,1,64,0,1,float16,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,1,64,128,1,fp8,fp8,0,0.031008000175158184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,1,64,0,1,fp8,fp8,0,0.02921066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,4,64,0,1,float16,float16,0,0.031258667508761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,4,64,128,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,4,64,128,1,float16,fp8,0,0.03179733455181122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,4,64,128,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,4,64,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,4,64,0,1,fp8,fp8,0,0.029818666477998097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,8,64,128,1,float16,float16,0,0.031301334500312805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,8,64,0,1,float16,float16,0,0.03128000100453695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,8,64,128,1,float16,fp8,0,0.03201066702604294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,8,64,128,1,fp8,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,8,64,0,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,8,64,0,1,fp8,fp8,0,0.029663999875386555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,96,64,128,1,float16,float16,0,0.0230880007147789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,96,64,0,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,96,64,128,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,96,64,128,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,96,64,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,96,64,0,1,fp8,fp8,0,0.021850667893886566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,1,64,128,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,1,64,0,1,float16,float16,0,0.021594665944576263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,1,64,128,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,1,64,128,1,fp8,fp8,0,0.021984001000722248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,1,64,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,1,64,0,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,4,64,128,1,float16,float16,0,0.022986667851607006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,4,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,4,64,128,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,4,64,128,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,4,64,0,1,float16,fp8,0,0.0220266655087471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,4,64,0,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,8,64,128,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,8,64,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,8,64,128,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,8,64,128,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,8,64,0,1,float16,fp8,0,0.021733333667119343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,8,64,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,1,64,128,1,float16,float16,0,0.9728000164031982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,1,64,128,1,float16,fp8,0,0.9727733135223389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,1,64,0,1,float16,float16,0,0.9738667011260986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,1,64,128,1,fp8,fp8,0,0.9086666901906332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,1,64,0,1,float16,fp8,0,0.9712159633636475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,1,64,0,1,fp8,fp8,0,0.9119040171305338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,4,64,128,1,float16,float16,0,0.9743039608001709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,4,64,0,1,float16,float16,0,0.9758719603220621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,4,64,128,1,float16,fp8,0,0.9708159764607748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,4,64,128,1,fp8,fp8,0,0.9793279965718588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,4,64,0,1,float16,fp8,0,0.9710293610890707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,4,64,0,1,fp8,fp8,0,0.9872213204701742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,8,64,0,1,float16,float16,0,0.9804373582204183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,8,64,128,1,float16,float16,0,0.9782293637593588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,8,64,128,1,float16,fp8,0,0.9768746693929037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,96,64,128,1,float16,float16,0,0.5400586525599161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,8,64,128,1,fp8,fp8,0,0.9879519939422607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,8,64,0,1,float16,fp8,0,0.9755040009816488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,8,64,0,1,fp8,fp8,0,0.9849119981129965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,96,64,0,1,float16,float16,0,0.5405653317769369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,96,64,128,1,float16,fp8,0,0.5299413204193115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,96,64,128,1,fp8,fp8,0,0.5388160149256388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,96,64,0,1,float16,fp8,0,0.5304586489995321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,1,64,128,1,float16,float16,0,0.49566932519276935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,96,64,0,1,fp8,fp8,0,0.5397973457972208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,1,64,0,1,float16,float16,0,0.4933120012283325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,1,64,128,1,float16,fp8,0,0.4936053355534871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,1,64,128,1,fp8,fp8,0,0.4623839855194092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,1,64,0,1,fp8,fp8,0,0.46112000942230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,1,64,0,1,float16,fp8,0,0.4939039945602417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,4,64,128,1,float16,float16,0,0.49530665079752606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,4,64,0,1,float16,float16,0,0.4954880078633626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,4,64,128,1,float16,fp8,0,0.493397315343221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,4,64,128,1,fp8,fp8,0,0.4780106544494629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,4,64,0,1,float16,fp8,0,0.49350933233896893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,4,64,0,1,fp8,fp8,0,0.47667733828226727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,8,64,128,1,float16,float16,0,0.49829332033793133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,8,64,0,1,float16,float16,0,0.4984000126520793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,8,64,128,1,float16,fp8,0,0.49555734793345135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,8,64,128,1,fp8,fp8,0,0.4800906578699748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,8,64,0,1,float16,fp8,0,0.49428800741831463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,8,64,0,1,fp8,fp8,0,0.4790346622467041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,96,64,128,1,float16,float16,0,0.27949867645899457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,96,64,0,1,float16,float16,0,0.2805866599082947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,96,64,128,1,float16,fp8,0,0.2744266589482625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,96,64,128,1,fp8,fp8,0,0.28038400411605835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,96,64,0,1,float16,fp8,0,0.2751413385073344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,96,64,0,1,fp8,fp8,0,0.2797546585400899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,1,64,128,1,float16,float16,0,0.2571306626001994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,1,64,0,1,float16,float16,0,0.2561546762784322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,1,64,128,1,float16,fp8,0,0.25681066513061523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,1,64,128,1,fp8,fp8,0,0.24018667141596475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,1,64,0,1,float16,fp8,0,0.2572159965833028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,1,64,0,1,fp8,fp8,0,0.24065599838892618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,4,64,128,1,float16,float16,0,0.25601067145665485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,4,64,0,1,float16,float16,0,0.25628799200057983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,4,64,128,1,float16,fp8,0,0.25704000393549603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,4,64,128,1,fp8,fp8,0,0.2462773323059082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,4,64,0,1,float16,fp8,0,0.2549813389778137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,4,64,0,1,fp8,fp8,0,0.24637866020202637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,8,64,128,1,float16,float16,0,0.25830399990081787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,8,64,0,1,float16,float16,0,0.2579626639684041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,8,64,128,1,float16,fp8,0,0.25753066937128705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,8,64,128,1,fp8,fp8,0,0.2488373319307963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,8,64,0,1,float16,fp8,0,0.25737067063649494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,8,64,0,1,fp8,fp8,0,0.2488480011622111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,96,64,128,1,float16,float16,0,0.14890133341153464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,96,64,0,1,float16,float16,0,0.1491146683692932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,96,64,128,1,float16,fp8,0,0.14663466811180115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,96,64,128,1,fp8,fp8,0,0.15040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,96,64,0,1,float16,fp8,0,0.14711466431617737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,96,64,0,1,fp8,fp8,0,0.15040533741315207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,1,64,128,1,float16,float16,0,0.13724266489346823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,1,64,0,1,float16,float16,0,0.13635733723640442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,1,64,128,1,float16,fp8,0,0.13634666800498962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,1,64,128,1,fp8,fp8,0,0.12959999839464822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,1,64,0,1,float16,fp8,0,0.13741866747538248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,1,64,0,1,fp8,fp8,0,0.12994133432706198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,4,64,128,1,float16,float16,0,0.13622933626174927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,4,64,0,1,float16,float16,0,0.13645333051681519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,4,64,128,1,float16,fp8,0,0.13594133655230203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,4,64,128,1,fp8,fp8,0,0.13153066237767538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,4,64,0,1,float16,fp8,0,0.13614933689435324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,4,64,0,1,fp8,fp8,0,0.1300266683101654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,8,64,128,1,float16,float16,0,0.13665599624315897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,8,64,0,1,float16,float16,0,0.13796266913414001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,8,64,128,1,float16,fp8,0,0.13593600193659464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,8,64,128,1,fp8,fp8,0,0.13201600313186646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,96,64,128,1,float16,float16,0,0.08514133095741272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,8,64,0,1,float16,fp8,0,0.13652799526850382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,8,64,0,1,fp8,fp8,0,0.13198933005332947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,96,64,0,1,float16,float16,0,0.08457066615422566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,96,64,128,1,float16,fp8,0,0.08494399984677632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,96,64,128,1,fp8,fp8,0,0.08803199728329976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,96,64,0,1,float16,fp8,0,0.08333866794904073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,96,64,0,1,fp8,fp8,0,0.08785600463549297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,1,64,128,1,float16,float16,0,0.07734933495521545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,1,64,0,1,float16,float16,0,0.07775466640790303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,1,64,128,1,float16,fp8,0,0.07784000039100647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,1,64,128,1,fp8,fp8,0,0.07254933317502339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,1,64,0,1,float16,fp8,0,0.07811200122038524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,1,64,0,1,fp8,fp8,0,0.07245866457621257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,4,64,128,1,float16,float16,0,0.0783786674340566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,4,64,0,1,float16,float16,0,0.07682666679223378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,4,64,128,1,float16,fp8,0,0.076773335536321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,4,64,128,1,fp8,fp8,0,0.07273066540559132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,4,64,0,1,float16,fp8,0,0.07704000174999237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,4,64,0,1,fp8,fp8,0,0.07425599793593089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,8,64,128,1,float16,float16,0,0.07663466533025105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,8,64,0,1,float16,float16,0,0.077674667040507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,8,64,128,1,float16,fp8,0,0.0778186668952306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,8,64,128,1,fp8,fp8,0,0.07414400080839793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,8,64,0,1,float16,fp8,0,0.07842133442560832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,8,64,0,1,fp8,fp8,0,0.07400000095367432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,96,64,128,1,float16,float16,0,0.04770133395989736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,96,64,0,1,float16,float16,0,0.048623998959859215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,96,64,128,1,float16,fp8,0,0.04911999901135763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,96,64,128,1,fp8,fp8,0,0.048021331429481506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,96,64,0,1,float16,fp8,0,0.04931733508904775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,96,64,0,1,fp8,fp8,0,0.048063998421033226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,1,64,128,1,float16,float16,0,0.04569066564242045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,1,64,0,1,float16,float16,0,0.04609066744645437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,1,64,128,1,float16,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,1,64,128,1,fp8,fp8,0,0.04353600243727366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,1,64,0,1,float16,fp8,0,0.04623466730117798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,1,64,0,1,fp8,fp8,0,0.04544533292452494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,4,64,128,1,float16,float16,0,0.04584533472855886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,4,64,0,1,float16,float16,0,0.045968001087506614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,4,64,128,1,float16,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,4,64,128,1,fp8,fp8,0,0.044810667634010315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,4,64,0,1,float16,fp8,0,0.04618666569391886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,4,64,0,1,fp8,fp8,0,0.04418666660785675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,8,64,128,1,float16,float16,0,0.04655999938646952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,8,64,128,1,float16,fp8,0,0.046015997727712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,8,64,0,1,float16,float16,0,0.047456001242001854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,8,64,128,1,fp8,fp8,0,0.04573333263397217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,8,64,0,1,float16,fp8,0,0.04596266647179922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,8,64,0,1,fp8,fp8,0,0.04563733438650767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,96,64,128,1,float16,float16,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,96,64,0,1,float16,float16,0,0.03012799968322118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,96,64,128,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,96,64,128,1,fp8,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,96,64,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,96,64,0,1,fp8,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,1,64,128,1,float16,float16,0,0.02920000006755193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,1,64,0,1,float16,float16,0,0.02985599885384242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,1,64,128,1,float16,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,1,64,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,1,64,0,1,float16,fp8,0,0.029861333469549816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,1,64,0,1,fp8,fp8,0,0.027903998891512554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,4,64,128,1,float16,float16,0,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,4,64,0,1,float16,float16,0,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,4,64,128,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,4,64,128,1,float16,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,4,64,0,1,float16,fp8,0,0.029717333614826202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,4,64,0,1,fp8,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,8,64,128,1,float16,float16,0,0.029685333371162415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,8,64,0,1,float16,float16,0,0.029711998999118805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,8,64,128,1,float16,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,8,64,128,1,fp8,fp8,0,0.029152000943819683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,8,64,0,1,fp8,fp8,0,0.02792000025510788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,8,64,0,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,96,64,128,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,96,64,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,96,64,128,1,float16,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,96,64,128,1,fp8,fp8,0,0.02295999974012375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,96,64,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,1,64,128,1,float16,float16,0,0.022863999009132385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,96,64,0,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,1,64,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,1,64,128,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,1,64,128,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,1,64,0,1,float16,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,1,64,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,4,64,128,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,4,64,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,4,64,128,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,4,64,128,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,4,64,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,4,64,0,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,8,64,128,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,8,64,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,8,64,128,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,8,64,128,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,8,64,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,8,64,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,96,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,96,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,96,64,128,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,96,64,128,1,fp8,fp8,0,0.018746666610240936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,96,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,96,64,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,1,64,128,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,1,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,1,64,128,1,float16,fp8,0,0.01859733338157336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,1,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,1,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,1,64,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,4,64,128,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,4,64,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,4,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,4,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,4,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,4,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,8,64,0,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,8,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,8,64,128,1,float16,float16,0,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,8,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,8,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,8,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,1,64,128,1,float16,float16,0,0.6635520060857137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,1,64,0,1,float16,float16,0,0.6644533475240072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,1,64,128,1,float16,fp8,0,0.6632053454717001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,1,64,128,1,fp8,fp8,0,0.6134933233261108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,1,64,0,1,float16,fp8,0,0.6612373193105062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,1,64,0,1,fp8,fp8,0,0.6150399843851725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,4,64,128,1,float16,float16,0,0.6616266568501791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,4,64,0,1,float16,float16,0,0.6631573438644409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,4,64,128,1,float16,fp8,0,0.6607893308003744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,4,64,128,1,fp8,fp8,0,0.6245919863382975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,4,64,0,1,fp8,fp8,0,0.6268320083618164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,4,64,0,1,float16,fp8,0,0.6592479944229126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,8,64,128,1,float16,float16,0,0.6609119971593221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,8,64,128,1,float16,fp8,0,0.6615306536356608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,8,64,0,1,float16,float16,0,0.6642293135325114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,8,64,128,1,fp8,fp8,0,0.6252319812774658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,8,64,0,1,fp8,fp8,0,0.6251893440882365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,8,64,0,1,float16,fp8,0,0.6649386485417684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,96,64,0,1,float16,float16,0,0.36210131645202637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,96,64,128,1,float16,float16,0,0.36133865515391034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,96,64,128,1,float16,fp8,0,0.3577440182367961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,96,64,0,1,float16,fp8,0,0.3561813433965047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,1,64,128,1,float16,float16,0,0.33980798721313477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,96,64,0,1,fp8,fp8,0,0.35371200243632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,96,64,128,1,fp8,fp8,0,0.3559253215789795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,1,64,0,1,float16,float16,0,0.33901333808898926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,1,64,128,1,float16,fp8,0,0.3405333360036214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,1,64,128,1,fp8,fp8,0,0.31550399462382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,1,64,0,1,float16,fp8,0,0.3415199915568034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,1,64,0,1,fp8,fp8,0,0.3146666685740153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,4,64,128,1,float16,float16,0,0.33955200513203937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,4,64,0,1,float16,float16,0,0.3391733169555664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,4,64,128,1,float16,fp8,0,0.3380800088246663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,4,64,128,1,fp8,fp8,0,0.31940799951553345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,4,64,0,1,float16,fp8,0,0.33805867036183673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,4,64,0,1,fp8,fp8,0,0.31863999366760254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,8,64,128,1,float16,float16,0,0.34058666229248047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,8,64,0,1,float16,float16,0,0.3406826655069987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,8,64,128,1,float16,fp8,0,0.3404159943262736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,8,64,128,1,fp8,fp8,0,0.32129067182540894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,8,64,0,1,float16,fp8,0,0.3388693332672119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,96,64,128,1,float16,float16,0,0.19082667430241904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,96,64,0,1,float16,float16,0,0.19031999508539835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,96,64,128,1,float16,fp8,0,0.18762133518854776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,8,64,0,1,fp8,fp8,0,0.32068800926208496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,96,64,128,1,fp8,fp8,0,0.18724799156188965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,96,64,0,1,float16,fp8,0,0.1893813411394755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,96,64,0,1,fp8,fp8,0,0.18779200315475464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,1,64,128,1,float16,float16,0,0.17773866653442383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,1,64,0,1,float16,float16,0,0.1787839929262797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,1,64,128,1,float16,fp8,0,0.17729065815607706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,1,64,128,1,fp8,fp8,0,0.1667520006497701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,1,64,0,1,float16,fp8,0,0.17842666308085123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,1,64,0,1,fp8,fp8,0,0.16472533345222473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,4,64,128,1,float16,float16,0,0.17730132738749185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,4,64,0,1,float16,float16,0,0.17775466044743857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,4,64,128,1,float16,fp8,0,0.17749865849812826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,4,64,128,1,fp8,fp8,0,0.1670773426691691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,4,64,0,1,float16,fp8,0,0.1774666706720988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,4,64,0,1,fp8,fp8,0,0.16702399651209512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,8,64,128,1,float16,float16,0,0.17891200383504233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,8,64,0,1,float16,float16,0,0.1776533325513204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,8,64,128,1,float16,fp8,0,0.1779306729634603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,8,64,128,1,fp8,fp8,0,0.16970133781433105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,8,64,0,1,float16,fp8,0,0.17838400602340698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,8,64,0,1,fp8,fp8,0,0.1681706706682841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,96,64,128,1,float16,float16,0,0.1053653359413147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,96,64,0,1,float16,float16,0,0.10577066739400227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,96,64,128,1,float16,fp8,0,0.1042080024878184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,96,64,128,1,fp8,fp8,0,0.1067039966583252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,96,64,0,1,float16,fp8,0,0.10494400064150493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,1,64,128,1,float16,float16,0,0.09701866904894511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,96,64,0,1,fp8,fp8,0,0.10612799723943074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,1,64,0,1,float16,float16,0,0.09805333614349365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,1,64,128,1,float16,fp8,0,0.09771733482678731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,1,64,128,1,fp8,fp8,0,0.09018133083979289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,1,64,0,1,float16,fp8,0,0.09867733716964722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,1,64,0,1,fp8,fp8,0,0.09073066711425781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,4,64,128,1,float16,float16,0,0.09848533074061076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,4,64,0,1,float16,float16,0,0.09693333506584167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,4,64,128,1,float16,fp8,0,0.0969493289788564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,4,64,128,1,fp8,fp8,0,0.09098133444786072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,4,64,0,1,float16,fp8,0,0.09733333190282185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,4,64,0,1,fp8,fp8,0,0.09107200304667155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,8,64,128,1,float16,float16,0,0.097653329372406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,8,64,0,1,float16,float16,0,0.09795733292897542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,8,64,128,1,float16,fp8,0,0.09727999567985535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,8,64,128,1,fp8,fp8,0,0.09136000275611877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,8,64,0,1,float16,fp8,0,0.09769066174825032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,96,64,128,1,float16,float16,0,0.05936533212661743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,8,64,0,1,fp8,fp8,0,0.09103999535242717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,96,64,0,1,float16,float16,0,0.05857066810131073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,96,64,128,1,float16,fp8,0,0.05784533421198527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,96,64,128,1,fp8,fp8,0,0.057477335135142006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,96,64,0,1,float16,fp8,0,0.05845866600672404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,96,64,0,1,fp8,fp8,0,0.05813866853713989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,1,64,128,1,float16,float16,0,0.05729066828886668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,1,64,0,1,float16,float16,0,0.05782400071620941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,1,64,128,1,float16,fp8,0,0.05620799958705902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,1,64,128,1,fp8,fp8,0,0.05412266651789347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,1,64,0,1,float16,fp8,0,0.056320001681645714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,1,64,0,1,fp8,fp8,0,0.05271466573079427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,4,64,128,1,float16,float16,0,0.05607999861240387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,4,64,0,1,float16,float16,0,0.05681066711743673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,4,64,128,1,float16,fp8,0,0.056261335810025535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,4,64,128,1,fp8,fp8,0,0.05378133555253347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,4,64,0,1,float16,fp8,0,0.056234667698542275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,4,64,0,1,fp8,fp8,0,0.053861334919929504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,8,64,128,1,float16,float16,0,0.05690133571624756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,8,64,0,1,float16,float16,0,0.05625600119431814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,8,64,128,1,float16,fp8,0,0.05624533196290334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,8,64,128,1,fp8,fp8,0,0.05425600210825602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,8,64,0,1,float16,fp8,0,0.05810666580994924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,8,64,0,1,fp8,fp8,0,0.05349333087603251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,96,64,128,1,float16,float16,0,0.0376800000667572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,96,64,0,1,float16,float16,0,0.037418665985266365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,96,64,128,1,float16,fp8,0,0.035386666655540466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,96,64,128,1,fp8,fp8,0,0.036805334190527596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,96,64,0,1,float16,fp8,0,0.03597866743803024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,96,64,0,1,fp8,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,1,64,128,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,1,64,0,1,float16,float16,0,0.03549333413441976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,1,64,128,1,float16,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,1,64,128,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,1,64,0,1,float16,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,1,64,0,1,fp8,fp8,0,0.0351946676770846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,4,64,128,1,float16,float16,0,0.03551999976237615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,4,64,0,1,float16,float16,0,0.0353973334034284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,4,64,128,1,float16,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,4,64,128,1,fp8,fp8,0,0.03397866586844126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,4,64,0,1,float16,fp8,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,4,64,0,1,fp8,fp8,0,0.033717334270477295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,8,64,128,1,float16,float16,0,0.03561066587766012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,8,64,0,1,float16,float16,0,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,8,64,128,1,float16,fp8,0,0.035445332527160645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,8,64,128,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,8,64,0,1,float16,fp8,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,8,64,0,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,96,64,128,1,float16,float16,0,0.02554133286078771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,96,64,0,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,96,64,128,1,float16,fp8,0,0.024885334074497223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,96,64,128,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,96,64,0,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,96,64,0,1,fp8,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,1,64,128,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,1,64,0,1,float16,float16,0,0.023845332364241283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,1,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,1,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,1,64,0,1,float16,fp8,0,0.0244159996509552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,1,64,0,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,4,64,128,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,4,64,0,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,4,64,128,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,4,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,4,64,0,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,4,64,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,8,64,128,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,8,64,0,1,float16,float16,0,0.02508266766866048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,8,64,128,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,8,64,128,1,fp8,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,8,64,0,1,float16,fp8,0,0.02500266581773758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,8,64,0,1,fp8,fp8,0,0.02422400067249934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,96,64,128,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,96,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,96,64,128,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,96,64,128,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,96,64,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,96,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,1,64,128,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,1,64,0,1,float16,float16,0,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,1,64,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,1,64,0,1,float16,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,1,64,128,1,fp8,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,1,64,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,4,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,4,64,0,1,float16,float16,0,0.01966399947802226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,4,64,128,1,float16,fp8,0,0.019952000429232914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,4,64,128,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,4,64,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,4,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,8,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,8,64,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,8,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,8,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,8,64,0,1,float16,fp8,0,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,8,64,0,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,96,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,96,64,0,1,float16,float16,0,0.016154666741689045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,96,64,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,96,64,128,1,fp8,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,96,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,96,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,1,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,1,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,1,64,128,1,float16,float16,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,1,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,1,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,1,64,0,1,fp8,fp8,0,0.01773333301146825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,4,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,4,64,0,1,float16,float16,0,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,4,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,4,64,128,1,float16,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,4,64,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,4,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,8,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,8,64,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,8,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,8,64,128,1,fp8,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,8,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,8,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,1,64,128,1,float16,float16,0,0.5113813479741415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,1,64,0,1,float16,float16,0,0.5106826623280843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,1,64,128,1,float16,fp8,0,0.5099733273188273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,1,64,128,1,fp8,fp8,0,0.474181334177653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,1,64,0,1,float16,fp8,0,0.5084480047225952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,1,64,0,1,fp8,fp8,0,0.4735039869944255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,4,64,128,1,float16,float16,0,0.5112853447596232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,4,64,0,1,float16,float16,0,0.5086986621220907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,4,64,128,1,float16,fp8,0,0.5075999895731608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,4,64,128,1,fp8,fp8,0,0.4795893430709839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,4,64,0,1,float16,fp8,0,0.5082346598307291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,4,64,0,1,fp8,fp8,0,0.47740264733632404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,8,64,128,1,float16,float16,0,0.5111786524454752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,8,64,0,1,float16,float16,0,0.5091520150502523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,8,64,128,1,float16,fp8,0,0.5103626648585001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,8,64,128,1,fp8,fp8,0,0.4816746711730957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,8,64,0,1,float16,fp8,0,0.5101226568222046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,8,64,0,1,fp8,fp8,0,0.478058656056722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,96,64,128,1,float16,float16,0,0.2738186717033386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,96,64,0,1,float16,float16,0,0.2734346588452657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,96,64,128,1,float16,fp8,0,0.27297600110371906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,96,64,128,1,fp8,fp8,0,0.264847993850708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,96,64,0,1,float16,fp8,0,0.2727359930674235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,96,64,0,1,fp8,fp8,0,0.26548800865809125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,1,64,128,1,float16,float16,0,0.26386133829752606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,1,64,0,1,float16,float16,0,0.2632053295771281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,1,64,128,1,float16,fp8,0,0.2614240050315857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,1,64,128,1,fp8,fp8,0,0.24473599592844644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,1,64,0,1,float16,fp8,0,0.26580266157786053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,1,64,0,1,fp8,fp8,0,0.24467732508977255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,4,64,128,1,float16,float16,0,0.2623786727587382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,4,64,0,1,float16,float16,0,0.2614346742630005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,4,64,128,1,fp8,fp8,0,0.24522666136423746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,4,64,128,1,float16,fp8,0,0.2640960017840068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,4,64,0,1,fp8,fp8,0,0.24481600522994995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,4,64,0,1,float16,fp8,0,0.2632799943288167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,8,64,128,1,float16,float16,0,0.2629599968592326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,8,64,128,1,fp8,fp8,0,0.24674133459726968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,8,64,128,1,float16,fp8,0,0.26209600766499835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,8,64,0,1,float16,float16,0,0.263754665851593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,96,64,128,1,float16,float16,0,0.14646400014559427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,8,64,0,1,float16,fp8,0,0.26183466116587323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,96,64,0,1,float16,float16,0,0.1479466656843821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,8,64,0,1,fp8,fp8,0,0.24653865893681845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,96,64,128,1,float16,fp8,0,0.1451413333415985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,96,64,128,1,fp8,fp8,0,0.14430933197339377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,96,64,0,1,float16,fp8,0,0.1453013320763906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,96,64,0,1,fp8,fp8,0,0.1439306636651357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,1,64,128,1,float16,float16,0,0.13801599542299905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,1,64,0,1,float16,float16,0,0.1399733324845632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,1,64,128,1,fp8,fp8,0,0.1288746694723765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,1,64,128,1,float16,fp8,0,0.13896000385284424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,1,64,0,1,float16,fp8,0,0.13961600263913473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,1,64,0,1,fp8,fp8,0,0.13078400492668152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,4,64,128,1,float16,float16,0,0.1381119986375173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,4,64,0,1,float16,float16,0,0.1402346690495809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,4,64,128,1,float16,fp8,0,0.13860799868901572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,4,64,128,1,fp8,fp8,0,0.12974933783213297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,4,64,0,1,float16,fp8,0,0.13886400063832602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,4,64,0,1,fp8,fp8,0,0.13009066383043924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,8,64,128,1,float16,float16,0,0.13993600010871887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,8,64,0,1,float16,float16,0,0.1401653289794922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,8,64,128,1,float16,fp8,0,0.13979732990264893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,8,64,128,1,fp8,fp8,0,0.13038933277130127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,8,64,0,1,float16,fp8,0,0.139765332142512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,8,64,0,1,fp8,fp8,0,0.12994133432706198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,96,64,128,1,float16,float16,0,0.07881600161393483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,96,64,0,1,float16,float16,0,0.0788373351097107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,96,64,128,1,float16,fp8,0,0.07857066889603932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,96,64,128,1,fp8,fp8,0,0.07635200023651123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,96,64,0,1,float16,fp8,0,0.07871466875076294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,96,64,0,1,fp8,fp8,0,0.07751999795436859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,1,64,128,1,float16,float16,0,0.07671999931335449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,1,64,0,1,float16,float16,0,0.07667199770609538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,1,64,128,1,float16,fp8,0,0.07654933134714763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,1,64,128,1,fp8,fp8,0,0.07293333113193512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,1,64,0,1,float16,fp8,0,0.0784693310658137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,1,64,0,1,fp8,fp8,0,0.07271466652552287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,4,64,128,1,float16,float16,0,0.07659199833869934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,4,64,128,1,float16,fp8,0,0.0774186650911967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,4,64,0,1,float16,float16,0,0.07826666533946991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,4,64,128,1,fp8,fp8,0,0.07226133346557617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,4,64,0,1,float16,fp8,0,0.07832533121109009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,4,64,0,1,fp8,fp8,0,0.0737066666285197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,8,64,128,1,float16,float16,0,0.07650133470694225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,8,64,0,1,float16,float16,0,0.07674133280913036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,8,64,128,1,float16,fp8,0,0.07794666786988576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,8,64,128,1,fp8,fp8,0,0.07283733288447063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,8,64,0,1,float16,fp8,0,0.07674133280913036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,96,64,128,1,float16,float16,0,0.04781866570313772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,8,64,0,1,fp8,fp8,0,0.0729013333717982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,96,64,0,1,float16,float16,0,0.04796266555786133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,96,64,128,1,float16,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,96,64,128,1,fp8,fp8,0,0.04613866905371348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,96,64,0,1,float16,fp8,0,0.04780266682306925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,96,64,0,1,fp8,fp8,0,0.04571733375390371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,1,64,128,1,float16,float16,0,0.04615999758243561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,1,64,0,1,float16,float16,0,0.04620266457398733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,1,64,128,1,float16,fp8,0,0.045968001087506614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,1,64,128,1,fp8,fp8,0,0.044112001856168113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,1,64,0,1,float16,fp8,0,0.04571199913819631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,1,64,0,1,fp8,fp8,0,0.04414933423201243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,4,64,128,1,float16,float16,0,0.04570133487383524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,4,64,0,1,float16,float16,0,0.04572799801826477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,4,64,128,1,float16,fp8,0,0.045567999283472695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,4,64,128,1,fp8,fp8,0,0.04409066836039225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,4,64,0,1,float16,fp8,0,0.04596266647179922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,4,64,0,1,fp8,fp8,0,0.043951998154322304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,8,64,128,1,float16,float16,0,0.04574400186538696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,8,64,0,1,float16,float16,0,0.04567466676235199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,8,64,128,1,float16,fp8,0,0.045978665351867676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,8,64,128,1,fp8,fp8,0,0.04358399907747904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,8,64,0,1,float16,fp8,0,0.04604800045490265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,8,64,0,1,fp8,fp8,0,0.0436160018046697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,96,64,128,1,float16,float16,0,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,96,64,0,1,float16,float16,0,0.03146133323510488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,96,64,128,1,float16,fp8,0,0.030666666726271313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,96,64,128,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,96,64,0,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,96,64,0,1,fp8,fp8,0,0.02974933385848999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,1,64,128,1,float16,float16,0,0.03138133386770884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,1,64,0,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,1,64,128,1,float16,fp8,0,0.031210665901501972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,1,64,128,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,1,64,0,1,float16,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,1,64,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,4,64,128,1,float16,float16,0,0.029605334003766377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,4,64,0,1,float16,float16,0,0.02979733298222224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,4,64,128,1,float16,fp8,0,0.031285333136717476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,4,64,128,1,fp8,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,4,64,0,1,float16,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,4,64,0,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,8,64,128,1,float16,float16,0,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,8,64,0,1,float16,float16,0,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,8,64,128,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,8,64,128,1,fp8,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,8,64,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,8,64,0,1,fp8,fp8,0,0.02921066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,96,64,128,1,float16,float16,0,0.022490667800108593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,96,64,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,96,64,128,1,float16,fp8,0,0.02256533255179723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,96,64,128,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,96,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,96,64,0,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,1,64,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,1,64,0,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,1,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,1,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,1,64,0,1,float16,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,1,64,0,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,4,64,128,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,4,64,0,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,4,64,128,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,4,64,128,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,4,64,0,1,float16,fp8,0,0.022490667800108593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,4,64,0,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,8,64,128,1,float16,float16,0,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,8,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,8,64,128,1,float16,fp8,0,0.022357332209746044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,8,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,8,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,8,64,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,96,64,128,1,float16,float16,0,0.018687999496857326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,96,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,96,64,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,96,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,96,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,96,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,1,64,128,1,float16,float16,0,0.01793066660563151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,1,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,1,64,128,1,float16,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,1,64,128,1,fp8,fp8,0,0.018266666680574417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,1,64,0,1,float16,fp8,0,0.018346666047970455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,1,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,4,64,128,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,4,64,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,4,64,128,1,float16,fp8,0,0.017711999515692394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,4,64,128,1,fp8,fp8,0,0.018672000616788864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,4,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,4,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,8,64,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,8,64,0,1,float16,float16,0,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,8,64,128,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,8,64,0,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,8,64,128,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,8,64,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,96,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,96,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,96,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,96,64,128,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,96,64,0,1,float16,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,96,64,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,1,64,128,1,float16,float16,0,0.016517333686351776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,1,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,1,64,128,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,1,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,1,64,0,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,1,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,4,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,4,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,4,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,4,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,4,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,4,64,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,8,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,8,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,8,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,8,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,8,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,8,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,1,64,128,1,float16,float16,0,0.4384959936141968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,1,64,0,1,float16,float16,0,0.4370400110880534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,1,64,128,1,float16,fp8,0,0.4410773515701294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,1,64,128,1,fp8,fp8,0,0.40588800112406415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,1,64,0,1,float16,fp8,0,0.4396853446960449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,1,64,0,1,fp8,fp8,0,0.4053013324737549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,4,64,128,1,float16,float16,0,0.43839999039967853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,4,64,0,1,float16,float16,0,0.43958401679992676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,4,64,128,1,float16,fp8,0,0.43801601727803546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,4,64,128,1,fp8,fp8,0,0.4066026608149211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,4,64,0,1,float16,fp8,0,0.4382400115331014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,4,64,0,1,fp8,fp8,0,0.4084479808807373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,8,64,128,1,float16,float16,0,0.43769601980845135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,8,64,0,1,float16,float16,0,0.4389493465423584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,8,64,128,1,float16,fp8,0,0.4392746686935425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,8,64,128,1,fp8,fp8,0,0.4076586564381917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,8,64,0,1,float16,fp8,0,0.4373226563135783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,8,64,0,1,fp8,fp8,0,0.40995200475056964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,96,64,128,1,float16,float16,0,0.23437867561976114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,96,64,0,1,float16,float16,0,0.23438932498296103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,96,64,128,1,float16,fp8,0,0.23404266436894736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,96,64,128,1,fp8,fp8,0,0.22429867585500082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,96,64,0,1,float16,fp8,0,0.2344693342844645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,96,64,0,1,fp8,fp8,0,0.22457067171732584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,1,64,128,1,float16,float16,0,0.22870399554570517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,1,64,0,1,float16,float16,0,0.2279520034790039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,1,64,128,1,float16,fp8,0,0.22752533356348673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,1,64,128,1,fp8,fp8,0,0.21020267407099405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,1,64,0,1,float16,fp8,0,0.2283359964688619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,1,64,0,1,fp8,fp8,0,0.20959466695785522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,4,64,128,1,float16,float16,0,0.22684800624847412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,4,64,0,1,float16,float16,0,0.2274186611175537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,4,64,128,1,float16,fp8,0,0.22792534033457437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,4,64,128,1,fp8,fp8,0,0.2099519968032837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,4,64,0,1,float16,fp8,0,0.22623467445373535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,4,64,0,1,fp8,fp8,0,0.21023466189702353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,8,64,0,1,float16,float16,0,0.22853867212931314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,8,64,128,1,float16,float16,0,0.2286240061124166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,8,64,128,1,float16,fp8,0,0.22619199752807617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,8,64,128,1,fp8,fp8,0,0.21133333444595337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,96,64,128,1,float16,float16,0,0.1225440005461375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,96,64,0,1,float16,float16,0,0.1237333317597707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,8,64,0,1,float16,fp8,0,0.22751466433207193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,8,64,0,1,fp8,fp8,0,0.21074666579564413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,96,64,128,1,float16,fp8,0,0.12346667051315308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,96,64,128,1,fp8,fp8,0,0.11932800213495891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,96,64,0,1,float16,fp8,0,0.12376532951990764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,96,64,0,1,fp8,fp8,0,0.11738133430480957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,1,64,128,1,float16,float16,0,0.12151466806729634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,1,64,0,1,float16,float16,0,0.1220906674861908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,1,64,128,1,float16,fp8,0,0.12159466743469238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,1,64,128,1,fp8,fp8,0,0.11351999640464783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,1,64,0,1,float16,fp8,0,0.12185066938400269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,1,64,0,1,fp8,fp8,0,0.11339199542999268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,4,64,128,1,float16,float16,0,0.12020799517631531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,4,64,0,1,float16,float16,0,0.12090667088826497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,4,64,128,1,float16,fp8,0,0.12155200044314067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,4,64,128,1,fp8,fp8,0,0.11353066563606262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,4,64,0,1,float16,fp8,0,0.12090133627255757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,4,64,0,1,fp8,fp8,0,0.1132533351580302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,8,64,128,1,float16,float16,0,0.1202346682548523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,8,64,0,1,float16,float16,0,0.12196266651153564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,8,64,128,1,float16,fp8,0,0.12044266859690349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,8,64,128,1,fp8,fp8,0,0.11337600151697795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,8,64,0,1,float16,fp8,0,0.12075733145078023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,96,64,128,1,float16,float16,0,0.06840533514817555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,96,64,0,1,float16,float16,0,0.06844266752401988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,8,64,0,1,fp8,fp8,0,0.11384000380833943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,96,64,128,1,float16,fp8,0,0.06833600004514058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,96,64,128,1,fp8,fp8,0,0.06607466439406078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,96,64,0,1,float16,fp8,0,0.06870933373769124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,96,64,0,1,fp8,fp8,0,0.06582933167616527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,1,64,128,1,float16,float16,0,0.06817600131034851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,1,64,0,1,float16,float16,0,0.06845333178838094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,1,64,128,1,float16,fp8,0,0.06855999926726024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,1,64,128,1,fp8,fp8,0,0.0642986645301183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,1,64,0,1,float16,fp8,0,0.06824000179767609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,1,64,0,1,fp8,fp8,0,0.0641546646753947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,4,64,128,1,float16,float16,0,0.06855999926726024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,4,64,0,1,float16,float16,0,0.06839466591676076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,4,64,128,1,float16,fp8,0,0.067930668592453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,4,64,128,1,fp8,fp8,0,0.06422933439413707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,4,64,0,1,float16,fp8,0,0.06842133402824402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,4,64,0,1,fp8,fp8,0,0.06422933439413707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,8,64,128,1,float16,float16,0,0.06816533207893372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,8,64,0,1,float16,float16,0,0.0688266654809316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,8,64,128,1,float16,fp8,0,0.0681279997030894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,8,64,128,1,fp8,fp8,0,0.06458133459091187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,8,64,0,1,float16,fp8,0,0.06874133149782817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,8,64,0,1,fp8,fp8,0,0.06438399851322174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,96,64,128,1,float16,float16,0,0.041877334316571556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,96,64,0,1,float16,float16,0,0.043706665436426796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,96,64,128,1,float16,fp8,0,0.04268800218900045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,96,64,128,1,fp8,fp8,0,0.03957866628964742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,96,64,0,1,float16,fp8,0,0.04177066683769226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,96,64,0,1,fp8,fp8,0,0.04177066683769226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,1,64,128,1,float16,float16,0,0.04166933397452036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,1,64,0,1,float16,float16,0,0.04177600145339966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,1,64,128,1,float16,fp8,0,0.04151466737190882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,1,64,128,1,fp8,fp8,0,0.041333332657814026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,1,64,0,1,float16,fp8,0,0.041573333243529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,1,64,0,1,fp8,fp8,0,0.03962666789690653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,4,64,128,1,float16,float16,0,0.04156800111134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,4,64,0,1,float16,float16,0,0.04191466669241587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,4,64,128,1,float16,fp8,0,0.041493333876132965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,4,64,128,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,4,64,0,1,float16,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,4,64,0,1,fp8,fp8,0,0.04018666595220566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,8,64,128,1,float16,float16,0,0.041519999504089355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,8,64,0,1,float16,float16,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,8,64,128,1,float16,fp8,0,0.04154666761557261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,8,64,128,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,8,64,0,1,float16,fp8,0,0.04177066683769226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,8,64,0,1,fp8,fp8,0,0.0395359992980957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,96,64,128,1,float16,float16,0,0.02787200113137563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,96,64,0,1,float16,float16,0,0.0276053324341774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,96,64,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,96,64,128,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,96,64,0,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,96,64,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,1,64,128,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,1,64,0,1,float16,float16,0,0.028117333849271137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,1,64,128,1,float16,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,1,64,128,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,1,64,0,1,float16,fp8,0,0.02886933336655299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,1,64,0,1,fp8,fp8,0,0.027109332382678986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,4,64,128,1,float16,float16,0,0.02714666724205017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,4,64,0,1,float16,float16,0,0.028250666956106823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,4,64,128,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,4,64,128,1,fp8,fp8,0,0.027066667874654133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,4,64,0,1,float16,fp8,0,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,4,64,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,8,64,128,1,float16,float16,0,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,8,64,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,8,64,128,1,float16,fp8,0,0.029002666473388672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,8,64,128,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,8,64,0,1,float16,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,8,64,0,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,96,64,128,1,float16,float16,0,0.0207893339296182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,96,64,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,96,64,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,96,64,128,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,96,64,0,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,96,64,0,1,fp8,fp8,0,0.019765333582957584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,1,64,128,1,float16,float16,0,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,1,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,1,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,1,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,1,64,0,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,1,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,4,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,4,64,0,1,float16,float16,0,0.021594665944576263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,4,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,4,64,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,4,64,0,1,float16,fp8,0,0.020879998803138733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,4,64,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,8,64,128,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,8,64,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,8,64,128,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,8,64,128,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,8,64,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,8,64,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,96,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,96,64,0,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,96,64,128,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,96,64,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,96,64,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,96,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,1,64,128,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,1,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,1,64,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,1,64,128,1,float16,fp8,0,0.019626667102177937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,1,64,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,1,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,4,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,4,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,4,64,128,1,fp8,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,4,64,128,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,4,64,0,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,4,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,8,64,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,8,64,128,1,float16,float16,0,0.018794666975736618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,8,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,8,64,128,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,8,64,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,8,64,0,1,fp8,fp8,0,0.018325333793958027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,96,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,96,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,96,64,128,1,float16,fp8,0,0.016330666840076447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,96,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,96,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,96,64,0,1,fp8,fp8,0,0.016586666305859882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,1,64,128,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,1,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,1,64,128,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,1,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,1,64,0,1,float16,fp8,0,0.01597333326935768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,1,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,4,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,4,64,0,1,float16,float16,0,0.019823999454577763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,4,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,4,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,4,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,4,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,8,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,8,64,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,8,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,8,64,128,1,fp8,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,8,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,8,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,0,0.37620266278584796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,0,0.3797920147577922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,0,0.3779840071996053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,1,64,128,1,fp8,fp8,0,0.3412373463312785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,0,0.3782293399175008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,1,64,0,1,fp8,fp8,0,0.3426986535390218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,0,0.37622400124867755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,0,0.3778666655222575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,0,0.37778667608896893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,4,64,128,1,fp8,fp8,0,0.3408000071843465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,0,0.3776053190231323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,4,64,0,1,fp8,fp8,0,0.34118398030598956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,0,0.3760853211085002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,0,0.37834131717681885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,0,0.37734933694203693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,8,64,128,1,fp8,fp8,0,0.3408799966176351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,8,64,0,1,fp8,fp8,0,0.34280534585316974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,0,0.37814398606618244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,96,64,128,1,float16,float16,0,0.19527999560038248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,96,64,0,1,float16,float16,0,0.19634666045506796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,96,64,128,1,float16,fp8,0,0.19716266791025797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,96,64,128,1,fp8,fp8,0,0.1786293387413025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,96,64,0,1,fp8,fp8,0,0.17701866229375204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,96,64,0,1,float16,fp8,0,0.19582400719324747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,0,0.1960373322168986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,0,0.19554134209950766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,0,0.1955839991569519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,0,0.19569067160288492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,1,64,128,1,fp8,fp8,0,0.17753599087397257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,1,64,0,1,fp8,fp8,0,0.1769226590792338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,0,0.19542932510375977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,0,0.19531200329462686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,0,0.19579199949900308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,4,64,128,1,fp8,fp8,0,0.1771413286526998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,0,0.19550933440526327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,4,64,0,1,fp8,fp8,0,0.1771786610285441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,0,0.19571733474731445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,0,0.195360004901886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,0,0.19537067413330078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,8,64,128,1,fp8,fp8,0,0.17751999696095785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,0,0.19543999433517456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,8,64,0,1,fp8,fp8,0,0.1772800087928772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,96,64,128,1,float16,float16,0,0.10578133662541707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,96,64,0,1,float16,float16,0,0.10526399811108907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,96,64,128,1,float16,fp8,0,0.10544000069300334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,96,64,128,1,fp8,fp8,0,0.09726933638254802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,96,64,0,1,float16,fp8,0,0.10708266496658325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,96,64,0,1,fp8,fp8,0,0.09731200337409973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,0,0.10531199971834819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,0,0.10564266641934712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,0,0.10737599929173787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,1,64,128,1,fp8,fp8,0,0.09716799855232239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,0,0.10523200035095215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,1,64,0,1,fp8,fp8,0,0.09538132945696513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,0,0.10540800293286641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,0,0.1053706705570221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,0,0.10567999879519145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,4,64,128,1,fp8,fp8,0,0.09638399879137675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,0,0.10523733496665955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,4,64,0,1,fp8,fp8,0,0.09517866373062134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,0,0.10533866286277771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,0,0.10520533720652263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,0,0.10519466797510783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,8,64,128,1,fp8,fp8,0,0.09640533725420634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,0,0.1051626702149709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,8,64,0,1,fp8,fp8,0,0.09713066617647807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,96,64,0,1,float16,float16,0,0.060266668597857155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,96,64,128,1,float16,float16,0,0.06071466704209646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,96,64,128,1,float16,fp8,0,0.060266668597857155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,96,64,128,1,fp8,fp8,0,0.05586666862169901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,96,64,0,1,float16,fp8,0,0.062447999914487205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,96,64,0,1,fp8,fp8,0,0.055829331278800964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,0,0.05994133154551188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,0,0.060229331254959106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,1,64,128,1,fp8,fp8,0,0.055973331133524575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,0,0.06224533418814341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,1,64,0,1,fp8,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,0,0.06160533428192139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,0,0.06033066908518473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,4,64,128,1,fp8,fp8,0,0.056458666920661926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,0,0.060453335444132485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,4,64,0,1,fp8,fp8,0,0.05585599939028422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,0,0.061434666315714516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,0,0.0613919993241628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,0,0.060693333546320595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,8,64,128,1,fp8,fp8,0,0.05590933561325073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,0,0.062080000837643944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,8,64,0,1,fp8,fp8,0,0.05594133337338766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,96,64,128,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,96,64,0,1,float16,float16,0,0.03759466608365377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,96,64,128,1,float16,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,96,64,128,1,fp8,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,96,64,0,1,float16,fp8,0,0.03760000069936117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,96,64,0,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,0,0.03745066622893015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,0,0.03769599894682566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,1,64,128,1,fp8,fp8,0,0.03538133452335993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,0,0.037989333271980286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,1,64,0,1,fp8,fp8,0,0.03583466758330663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,0,0.037733333806196846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,0,0.03812800099452337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,4,64,128,1,fp8,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,0,0.03965333352486292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,4,64,0,1,fp8,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,0,0.03925866633653641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,0,0.0391839991013209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,0,0.03736533224582672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,8,64,128,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,0,0.039359999199708305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,8,64,0,1,fp8,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,96,64,128,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,96,64,0,1,float16,float16,0,0.02584533393383026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,96,64,128,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,96,64,128,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,96,64,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,96,64,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,0,0.027072000006834667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,0,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,1,64,128,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,0,0.026149332523345947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,1,64,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,0,0.027098665634791057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,4,64,128,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,4,64,0,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,8,64,128,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,0,0.026741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,8,64,0,1,fp8,fp8,0,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,96,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,96,64,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,96,64,128,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,96,64,128,1,fp8,fp8,0,0.02035733312368393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,96,64,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,96,64,0,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,1,64,128,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,1,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,0,0.01989866668979327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,4,64,128,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,4,64,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,8,64,0,1,fp8,fp8,0,0.020442667106787365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,8,64,128,1,fp8,fp8,0,0.019760000209013622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,96,64,128,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,96,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,96,64,128,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,96,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,96,64,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,96,64,0,1,fp8,fp8,0,0.01794133335351944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,1,64,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,1,64,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,0,0.01876266673207283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,0,0.018181333939234417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,4,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,4,64,0,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,0,0.018079999834299088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,8,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,0,0.017808000246683758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,8,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,96,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,96,64,0,1,float16,float16,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,96,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,96,64,128,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,96,64,0,1,float16,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,96,64,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,0,0.015967999895413715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,1,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,1,64,128,1,fp8,fp8,0,0.015872000406185787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,4,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,4,64,0,1,fp8,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,8,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,8,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,float16,0,3.1642026901245117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,1,64,128,1,float16,fp8,0,3.1896212895711265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,1,64,128,1,fp8,fp8,0,2.9763358434041343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,float16,0,3.1961708068847656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,float16,0,16.893530527750652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,1,64,0,1,fp8,fp8,0,14.58474604288737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,2,64,128,1,float16,fp8,0,3.2213331858317056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,1,64,0,1,float16,fp8,0,16.88760503133138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,2,64,128,1,fp8,fp8,0,3.0148960749308267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,float16,0,3.2140000661214194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,float16,0,16.928367614746094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,4,64,128,1,float16,fp8,0,3.2415040334065757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,4,64,128,1,fp8,fp8,0,3.040053367614746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,2,64,0,1,fp8,fp8,0,14.59185536702474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,2,64,0,1,float16,fp8,0,16.932479858398438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,float16,0,3.2396532694498696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,float16,0,16.93930180867513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,8,64,128,1,float16,fp8,0,3.2705119450887046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,8,64,128,1,fp8,fp8,0,3.072725296020508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,4,64,0,1,fp8,fp8,0,14.619162241617838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,4,64,0,1,float16,fp8,0,16.99126434326172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,float16,0,1.8786452611287434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,float16,0,17.031487782796223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,64,64,128,1,float16,fp8,0,1.9260160128275554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,float16,0,8.869562784830729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,64,64,128,1,fp8,fp8,0,1.8386400540669758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,8,64,0,1,fp8,fp8,0,14.676021575927734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,float16,0,1.6538346608479817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,8,64,0,1,float16,fp8,0,17.007605234781902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,1,64,128,1,float16,fp8,0,1.670805295308431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,1,64,128,1,fp8,fp8,0,1.5584905942281086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,64,64,0,1,fp8,fp8,0,7.700997034708659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,64,64,0,1,float16,fp8,0,8.927775700887045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,float16,0,1.654047966003418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,float16,0,8.562927881876627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,2,64,128,1,float16,fp8,0,1.6734347343444824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,1,64,0,1,fp8,fp8,0,7.397653579711914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,2,64,128,1,fp8,fp8,0,1.5647145907084148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,1,64,0,1,float16,fp8,0,8.549253463745117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,float16,0,1.6652053197224934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,float16,0,8.554362614949545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,4,64,128,1,float16,fp8,0,1.6767946879069011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,2,64,0,1,fp8,fp8,0,7.390037536621094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,4,64,128,1,fp8,fp8,0,1.573520024617513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,2,64,0,1,float16,fp8,0,8.578255971272787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,float16,0,1.6758559544881184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,float16,0,8.557882944742838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,4,64,0,1,fp8,fp8,0,7.435871760050456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,4,64,0,1,float16,fp8,0,8.594170888264975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,8,64,128,1,float16,fp8,0,1.6979893048604329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,8,64,128,1,fp8,fp8,0,1.5899146397908528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,float16,0,8.584933598836264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,float16,0,1.0317973295847576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,64,64,128,1,float16,fp8,0,1.0586986541748047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,64,64,128,1,fp8,fp8,0,1.0205439726511638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,float16,0,4.561781247456868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,8,64,0,1,fp8,fp8,0,7.433194478352864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,8,64,0,1,float16,fp8,0,8.624618530273438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,float16,0,0.927296002705892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,64,64,0,1,float16,fp8,0,4.5973920822143555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,1,64,128,1,float16,fp8,0,0.9348159631093343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,1,64,128,1,fp8,fp8,0,0.8800053596496582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,64,64,0,1,fp8,fp8,0,3.9833014806111655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,float16,0,0.9270133177439371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,float16,0,4.4154612223307295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,2,64,128,1,float16,fp8,0,0.9383412996927897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,1,64,0,1,fp8,fp8,0,3.8293174107869468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,1,64,0,1,float16,fp8,0,4.4269866943359375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,2,64,128,1,fp8,fp8,0,0.8843466440836588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,float16,0,0.9327893257141113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,float16,0,4.421674728393555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,4,64,128,1,float16,fp8,0,0.9395786921183268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,2,64,0,1,fp8,fp8,0,3.8408053716023765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,4,64,128,1,fp8,fp8,0,0.8876319726308187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,2,64,0,1,float16,fp8,0,4.432613372802734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,float16,0,0.9385866324106852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,float16,0,4.424160003662109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,8,64,128,1,float16,fp8,0,0.9480213324228922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,4,64,0,1,fp8,fp8,0,3.839776039123535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,4,64,0,1,float16,fp8,0,4.438672065734863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,8,64,128,1,fp8,fp8,0,0.8964213530222574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,float16,0,0.7221279939015707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,float16,0,4.441280047098796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,64,64,128,1,float16,fp8,0,0.720965305964152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,64,64,128,1,fp8,fp8,0,0.6845920085906982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,float16,0,2.5292320251464844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,8,64,0,1,fp8,fp8,0,3.853952089945475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,8,64,0,1,float16,fp8,0,4.442634582519531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,float16,0,0.7187146345774332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,1,64,128,1,float16,fp8,0,0.7200213273366293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,64,64,0,1,fp8,fp8,0,2.2088054021199546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,64,64,0,1,float16,fp8,0,2.524874687194824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,1,64,128,1,fp8,fp8,0,0.6851946512858073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,float16,0,2.498981316884359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,float16,0,0.7185760339101156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,1,64,0,1,fp8,fp8,0,2.1954293251037598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,2,64,128,1,float16,fp8,0,0.7198186715443929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,1,64,0,1,float16,fp8,0,2.5085280736287436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,float16,0,2.4983572959899902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,2,64,128,1,fp8,fp8,0,0.6837866306304932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,float16,0,0.7201759815216064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,2,64,0,1,fp8,fp8,0,2.2022933959960938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,4,64,128,1,float16,fp8,0,0.7213226954142252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,2,64,0,1,float16,fp8,0,2.506394704182943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,4,64,128,1,fp8,fp8,0,0.6875840028127035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,float16,0,2.5022239685058594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,float16,0,0.7216533025105795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,4,64,0,1,fp8,fp8,0,2.202810605367025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,4,64,0,1,float16,fp8,0,2.502432028452555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,8,64,128,1,float16,fp8,0,0.721839984258016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,8,64,128,1,fp8,fp8,0,0.6868106524149576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,float16,0,2.5048853556315103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,8,64,0,1,fp8,fp8,0,2.201296011606852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,8,64,0,1,float16,fp8,0,2.5062880516052246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,float16,0,2.357327938079834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,1,64,128,1,fp8,fp8,0,2.2064266204833984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,1,64,128,1,float16,fp8,0,2.376688003540039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,float16,0,2.3637173970540366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,float16,0,10.027450561523438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,1,64,0,1,float16,fp8,0,10.013877232869467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,2,64,128,1,float16,fp8,0,2.3856746355692544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,1,64,0,1,fp8,fp8,0,8.662031809488932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,2,64,128,1,fp8,fp8,0,2.227253278096517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,float16,0,10.018058776855469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,float16,0,2.3754666646321616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,4,64,128,1,float16,fp8,0,2.3986560503641763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,2,64,0,1,fp8,fp8,0,8.680416107177734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,2,64,0,1,float16,fp8,0,10.042762756347656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,4,64,128,1,fp8,fp8,0,2.2446773846944175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,float16,0,2.3954346974690757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,float16,0,10.05190912882487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,8,64,128,1,float16,fp8,0,2.423130671183268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,4,64,0,1,fp8,fp8,0,8.711008071899414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,4,64,0,1,float16,fp8,0,10.069791793823242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,8,64,128,1,fp8,fp8,0,2.2709439595540366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,float16,0,1.408880074818929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,float16,0,10.065701166788736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,64,64,128,1,float16,fp8,0,1.4449067115783691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,64,64,128,1,fp8,fp8,0,1.3784054120381672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,float16,0,5.330735842386882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,8,64,0,1,fp8,fp8,0,8.727530797322592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,8,64,0,1,float16,fp8,0,10.109941482543945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,float16,0,1.2429227034250896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,1,64,128,1,float16,fp8,0,1.2543466885884602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,64,64,0,1,fp8,fp8,0,4.657626787821452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,64,64,0,1,float16,fp8,0,5.367690404256185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,1,64,128,1,fp8,fp8,0,1.1722026666005452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,float16,0,5.106032053629558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,float16,0,1.2467520236968994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,1,64,0,1,fp8,fp8,0,4.428714752197266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,1,64,0,1,float16,fp8,0,5.1135679880778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,2,64,128,1,float16,fp8,0,1.2587040265401204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,2,64,128,1,fp8,fp8,0,1.1782026290893555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,float16,0,5.104912122090657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,float16,0,1.2494293053944905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,4,64,128,1,float16,fp8,0,1.2639093399047852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,2,64,0,1,fp8,fp8,0,4.438213348388672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,2,64,0,1,float16,fp8,0,5.106906572977702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,4,64,128,1,fp8,fp8,0,1.1833866437276204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,float16,0,1.2597546577453613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,float16,0,5.121402740478516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,8,64,128,1,float16,fp8,0,1.2742559909820557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,4,64,0,1,fp8,fp8,0,4.439050674438477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,4,64,0,1,float16,fp8,0,5.131925264994304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,8,64,128,1,fp8,fp8,0,1.196826696395874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,float16,0,0.778048038482666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,float16,0,5.133370717366536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,64,64,128,1,float16,fp8,0,0.7999040285746256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,8,64,0,1,fp8,fp8,0,4.45908260345459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,64,64,128,1,fp8,fp8,0,0.7713973522186279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,float16,0,2.77073605855306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,8,64,0,1,float16,fp8,0,5.141008059183757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,float16,0,0.7000479698181152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,1,64,128,1,float16,fp8,0,0.7051626841227213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,64,64,0,1,fp8,fp8,0,2.439157327016195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,64,64,0,1,float16,fp8,0,2.7907253901163735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,1,64,128,1,fp8,fp8,0,0.6655679941177368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,float16,0,2.650949319203695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,float16,0,0.7013546625773112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,2,64,128,1,float16,fp8,0,0.7075200080871582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,1,64,0,1,fp8,fp8,0,2.322373390197754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,1,64,0,1,float16,fp8,0,2.660325368245443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,2,64,128,1,fp8,fp8,0,0.6688053607940674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,float16,0,2.6560373306274414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,float16,0,0.7043680349985758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,4,64,128,1,float16,fp8,0,0.7104799747467041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,2,64,0,1,fp8,fp8,0,2.3261067072550454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,2,64,0,1,float16,fp8,0,2.692255973815918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,4,64,128,1,fp8,fp8,0,0.6724159717559814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,float16,0,2.66105063756307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,float16,0,0.7098026275634766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,4,64,0,1,fp8,fp8,0,2.3353919982910156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,4,64,0,1,float16,fp8,0,2.674448013305664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,8,64,128,1,float16,fp8,0,0.7174719969431559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,8,64,128,1,fp8,fp8,0,0.6770293712615967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,float16,0,2.6700159708658853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,float16,0,0.5470879872639974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,64,64,128,1,float16,fp8,0,0.5472000042597452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,8,64,0,1,fp8,fp8,0,2.3337546984354653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,float16,0,1.5681653022766113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,8,64,0,1,float16,fp8,0,2.6805760065714517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,64,64,128,1,fp8,fp8,0,0.5228533347447714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,float16,0,0.544271985689799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,64,64,0,1,float16,fp8,0,1.569167931874593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,1,64,128,1,float16,fp8,0,0.5468693176905314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,64,64,0,1,fp8,fp8,0,1.3852532704671223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,float16,0,1.5536053975423176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,1,64,128,1,fp8,fp8,0,0.5175786813100179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,float16,0,0.547264019648234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,1,64,0,1,float16,fp8,0,1.5530506769816081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,1,64,0,1,fp8,fp8,0,1.3780266443888347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,2,64,128,1,float16,fp8,0,0.5478453238805135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,float16,0,1.5498560269673665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,2,64,128,1,fp8,fp8,0,0.5205013354619344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,float16,0,0.5435733397801717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,2,64,0,1,fp8,fp8,0,1.3761760393778484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,2,64,0,1,float16,fp8,0,1.5554399490356445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,4,64,128,1,float16,fp8,0,0.5441120068232218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,float16,0,1.5567199389139812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,4,64,128,1,fp8,fp8,0,0.5213973522186279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,float16,0,0.5459839900334676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,4,64,0,1,float16,fp8,0,1.5543732643127441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,4,64,0,1,fp8,fp8,0,1.374085267384847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,8,64,128,1,float16,fp8,0,0.5460799932479858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,float16,0,1.557653268178304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,8,64,128,1,fp8,fp8,0,0.5226613283157349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,8,64,0,1,float16,fp8,0,1.554682731628418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,8,64,0,1,fp8,fp8,0,1.3776319821675618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,float16,0,1.9584479331970215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,1,64,128,1,float16,fp8,0,1.977786699930827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,1,64,128,1,fp8,fp8,0,1.835408051808675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,float16,0,1.9636693000793457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,float16,0,7.22218132019043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,1,64,0,1,float16,fp8,0,7.254261016845703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,2,64,128,1,float16,fp8,0,1.983242670694987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,1,64,0,1,fp8,fp8,0,6.27619743347168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,2,64,128,1,fp8,fp8,0,1.8475200335184734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,float16,0,7.264810562133789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,float16,0,1.9724960327148438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,2,64,0,1,fp8,fp8,0,6.288527806599935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,4,64,128,1,float16,fp8,0,1.9939093589782715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,2,64,0,1,float16,fp8,0,7.253434499104817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,4,64,128,1,fp8,fp8,0,1.859498659769694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,float16,0,7.2656904856363935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,float16,0,1.9881919225056965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,8,64,128,1,float16,fp8,0,2.011103947957357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,4,64,0,1,fp8,fp8,0,6.303669611612956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,4,64,0,1,float16,fp8,0,7.283077239990234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,8,64,128,1,fp8,fp8,0,1.88265593846639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,float16,0,7.285781224568685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,float16,0,1.1740106741587322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,8,64,0,1,float16,fp8,0,7.320949554443359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,64,64,128,1,float16,fp8,0,1.2064320246378581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,8,64,0,1,fp8,fp8,0,6.330111821492513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,64,64,128,1,fp8,fp8,0,1.1518239974975586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,float16,0,3.8918399810791016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,float16,0,1.0380266507466633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,1,64,128,1,float16,fp8,0,1.047210693359375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,64,64,0,1,fp8,fp8,0,3.4170398712158203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,64,64,0,1,float16,fp8,0,3.9167893727620444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,1,64,128,1,fp8,fp8,0,0.979535977045695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,float16,0,3.7037226359049478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,float16,0,1.0392320156097412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,2,64,128,1,float16,fp8,0,1.0503733158111572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,1,64,0,1,fp8,fp8,0,3.2243359883626304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,1,64,0,1,float16,fp8,0,3.7039359410603843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,2,64,128,1,fp8,fp8,0,0.9839999675750732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,float16,0,3.7055625915527344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,float16,0,1.0453813076019287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,4,64,128,1,float16,fp8,0,1.0565173625946045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,2,64,0,1,fp8,fp8,0,3.227967898050944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,2,64,0,1,float16,fp8,0,3.708431879679362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,4,64,128,1,fp8,fp8,0,0.9903679688771566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,float16,0,3.714458783467611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,float16,0,1.052186648050944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,4,64,0,1,fp8,fp8,0,3.234773317972819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,8,64,128,1,float16,fp8,0,1.0645493666330974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,4,64,0,1,float16,fp8,0,3.719776153564453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,8,64,128,1,fp8,fp8,0,1.0011573632558186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,float16,0,0.6508693297704061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,float16,0,3.7325334548950195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,64,64,128,1,float16,fp8,0,0.6689866383870443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,8,64,0,1,float16,fp8,0,3.743061383565267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,8,64,0,1,fp8,fp8,0,3.247573216756185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,64,64,128,1,fp8,fp8,0,0.6450506846110026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,float16,0,2.0336267153422036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,float16,0,0.5843573411305746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,1,64,128,1,float16,fp8,0,0.5904320081075033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,64,64,0,1,fp8,fp8,0,1.8014987309773762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,64,64,0,1,float16,fp8,0,2.0531999270121255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,1,64,128,1,fp8,fp8,0,0.5580693483352661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,float16,0,1.9383412996927898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,float16,0,0.5867520173390707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,1,64,0,1,fp8,fp8,0,1.7041813532511394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,2,64,128,1,float16,fp8,0,0.5925173362096151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,1,64,0,1,float16,fp8,0,1.9479947090148926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,float16,0,1.9437813758850098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,2,64,128,1,fp8,fp8,0,0.5597226619720459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,float16,0,0.5893866618474325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,2,64,0,1,fp8,fp8,0,1.7045706113179524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,2,64,0,1,float16,fp8,0,1.9502986272176106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,4,64,128,1,float16,fp8,0,0.5946826537450155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,float16,0,1.9492640495300293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,4,64,128,1,fp8,fp8,0,0.5627573331197103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,float16,0,0.5920586585998535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,4,64,0,1,fp8,fp8,0,1.7095306714375813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,4,64,0,1,float16,fp8,0,1.9559040069580078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,8,64,128,1,float16,fp8,0,0.5989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,8,64,128,1,fp8,fp8,0,0.5685919920603434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,float16,0,1.9541385968526204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,float16,0,0.45532798767089844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,64,64,128,1,float16,fp8,0,0.45579198996225995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,8,64,0,1,float16,fp8,0,1.9630239804585774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,8,64,0,1,fp8,fp8,0,1.7164853413899739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,float16,0,1.1760319868723552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,64,64,128,1,fp8,fp8,0,0.43588801225026447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,float16,0,0.45708799362182617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,64,64,0,1,fp8,fp8,0,1.0393493175506592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,64,64,0,1,float16,fp8,0,1.1757919788360596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,1,64,128,1,float16,fp8,0,0.45844801266988117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,1,64,128,1,fp8,fp8,0,0.4351786772410075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,float16,0,1.1573440233866374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,float16,0,0.45602667331695557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,1,64,0,1,float16,fp8,0,1.1616533597310383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,1,64,0,1,fp8,fp8,0,1.03438401222229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,2,64,128,1,float16,fp8,0,0.4569973150889079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,float16,0,1.156704028447469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,2,64,128,1,fp8,fp8,0,0.43509332338968915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,float16,0,0.4543999830881755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,2,64,0,1,float16,fp8,0,1.1630880037943523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,2,64,0,1,fp8,fp8,0,1.0329439640045166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,4,64,128,1,float16,fp8,0,0.45333866278330487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,float16,0,1.1622239748636882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,4,64,128,1,fp8,fp8,0,0.43691198031107586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,float16,0,0.4564906756083171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,4,64,0,1,float16,fp8,0,1.1639359792073567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,4,64,0,1,fp8,fp8,0,1.0297760168711345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,8,64,128,1,float16,fp8,0,0.45577601591746014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,float16,0,1.164469321568807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,8,64,128,1,fp8,fp8,0,0.43619732062021893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,8,64,0,1,float16,fp8,0,1.161685307820638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,8,64,0,1,fp8,fp8,0,1.0332746505737305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,float16,0,3.072175979614258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,1,64,128,1,fp8,fp8,0,2.8809385299682617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,1,64,128,1,float16,fp8,0,3.094570795694987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,float16,0,3.1014506022135415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,float16,0,9.705120086669922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,1,64,0,1,fp8,fp8,0,8.463877360026041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,1,64,0,1,float16,fp8,0,9.727888107299805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,2,64,128,1,float16,fp8,0,3.129221280415853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,2,64,128,1,fp8,fp8,0,2.923434575398763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,float16,0,9.746933619181315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,float16,0,3.125311851501465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,4,64,128,1,float16,fp8,0,3.1524906158447266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,2,64,0,1,fp8,fp8,0,8.50540288289388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,2,64,0,1,float16,fp8,0,9.758127848307291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,4,64,128,1,fp8,fp8,0,2.951173464457194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,float16,0,9.79086430867513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,float16,0,3.1544745763142905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,4,64,0,1,fp8,fp8,0,8.517173131306967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,8,64,128,1,float16,fp8,0,3.1813653310139975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,4,64,0,1,float16,fp8,0,9.81432024637858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,8,64,128,1,fp8,fp8,0,2.9758825302124023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,float16,0,1.7923572858174641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,float16,0,9.828959782918295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,64,64,128,1,float16,fp8,0,1.8356906572977703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,8,64,0,1,fp8,fp8,0,8.567914962768555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,64,64,128,1,fp8,fp8,0,1.7484639485677083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,8,64,0,1,float16,fp8,0,9.850847880045572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,float16,0,5.203376134236653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,float16,0,1.5667413075764973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,1,64,128,1,float16,fp8,0,1.581925392150879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,64,64,0,1,float16,fp8,0,5.255818684895833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,64,64,0,1,fp8,fp8,0,4.59928544362386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,float16,0,4.903813362121582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,1,64,128,1,fp8,fp8,0,1.4712692896525066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,float16,0,1.569034735361735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,2,64,128,1,float16,fp8,0,1.5855894088745117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,1,64,0,1,fp8,fp8,0,4.278629302978516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,1,64,0,1,float16,fp8,0,4.934805234273274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,float16,0,4.907274564107259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,2,64,128,1,fp8,fp8,0,1.4775840441385906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,float16,0,1.5781280199686687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,2,64,0,1,fp8,fp8,0,4.286783854166667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,2,64,0,1,float16,fp8,0,4.929658571879069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,4,64,128,1,float16,fp8,0,1.5950613021850586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,float16,0,4.925802548726399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,4,64,128,1,fp8,fp8,0,1.4872533480326335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,float16,0,1.5896213849385579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,4,64,0,1,fp8,fp8,0,4.2914988199869795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,4,64,0,1,float16,fp8,0,4.9491628011067705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,8,64,128,1,float16,fp8,0,1.607850710550944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,8,64,128,1,fp8,fp8,0,1.5038240750630696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,float16,0,4.951893488566081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,float16,0,0.9424853324890137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,64,64,128,1,float16,fp8,0,0.9680480162302653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,float16,0,2.670106569925944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,8,64,0,1,fp8,fp8,0,4.310122807820638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,8,64,0,1,float16,fp8,0,4.969024022420247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,64,64,128,1,fp8,fp8,0,0.9261759916941324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,float16,0,0.8302506605784098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,1,64,128,1,float16,fp8,0,0.8405173619588217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,64,64,0,1,fp8,fp8,0,2.364485263824463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,64,64,0,1,float16,fp8,0,2.695333480834961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,float16,0,2.5185440381368003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,1,64,128,1,fp8,fp8,0,0.7874133586883545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,float16,0,0.8363200028737386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,1,64,0,1,fp8,fp8,0,2.2098827362060547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,1,64,0,1,float16,fp8,0,2.530501365661621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,2,64,128,1,float16,fp8,0,0.84334397315979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,2,64,128,1,fp8,fp8,0,0.7918079694112142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,float16,0,2.5252960522969565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,float16,0,0.8404853343963623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,2,64,0,1,fp8,fp8,0,2.2145867347717285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,2,64,0,1,float16,fp8,0,2.536522706349691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,4,64,128,1,float16,fp8,0,0.848304033279419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,4,64,128,1,fp8,fp8,0,0.7970666885375977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,float16,0,2.534261385599772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,float16,0,0.8466453552246094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,4,64,0,1,float16,fp8,0,2.54477326075236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,4,64,0,1,fp8,fp8,0,2.2192586263020835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,8,64,128,1,float16,fp8,0,0.856058677037557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,float16,0,2.5491092999776206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,8,64,128,1,fp8,fp8,0,0.8048160076141357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,float16,0,0.5232479969660441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,64,64,128,1,float16,fp8,0,0.5385119915008545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,float16,0,1.4078559875488281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,8,64,0,1,float16,fp8,0,2.557578722635905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,8,64,0,1,fp8,fp8,0,2.2301066716512046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,64,64,128,1,fp8,fp8,0,0.519813338915507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,float16,0,0.47140800952911377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,64,64,0,1,fp8,fp8,0,1.2607519626617432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,64,64,0,1,float16,fp8,0,1.4216960271199544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,1,64,128,1,float16,fp8,0,0.4745013316472371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,float16,0,1.3407999674479167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,1,64,128,1,fp8,fp8,0,0.45085867245992023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,float16,0,0.47217599550882977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,1,64,0,1,float16,fp8,0,1.3389013608296711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,1,64,0,1,fp8,fp8,0,1.180176019668579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,2,64,128,1,float16,fp8,0,0.4758933385213216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,float16,0,1.3394826253255208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,2,64,128,1,fp8,fp8,0,0.45178667704264325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,float16,0,0.47436265150705975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,2,64,0,1,float16,fp8,0,1.3438773155212402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,2,64,0,1,fp8,fp8,0,1.1815733114878337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,4,64,128,1,float16,fp8,0,0.4793173472086589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,float16,0,1.3416479428609211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,4,64,128,1,fp8,fp8,0,0.4540693362553914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,float16,0,0.4778453509012858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,4,64,0,1,float16,fp8,0,1.3506080309549968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,4,64,0,1,fp8,fp8,0,1.183685302734375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,8,64,128,1,float16,fp8,0,0.4822719891866048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,8,64,128,1,fp8,fp8,0,0.4575626850128174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,float16,0,1.3492107391357422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,float16,0,0.37189332644144696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,8,64,0,1,float16,fp8,0,1.3528587023417156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,8,64,0,1,fp8,fp8,0,1.1875999768575032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,64,64,128,1,float16,fp8,0,0.37170668443044025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,64,64,128,1,fp8,fp8,0,0.352944016456604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,float16,0,0.8355573018391927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,float16,0,0.37010665734608966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,64,64,0,1,float16,fp8,0,0.8303840160369873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,64,64,0,1,fp8,fp8,0,0.7409813404083252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,1,64,128,1,float16,fp8,0,0.3695360024770101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,1,64,128,1,fp8,fp8,0,0.35173332691192627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,float16,0,0.8198080062866211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,1,64,0,1,float16,fp8,0,0.822271982828776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,1,64,0,1,fp8,fp8,0,0.7342186768849691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,float16,0,0.3694933255513509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,2,64,128,1,float16,fp8,0,0.36737600962320965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,2,64,128,1,fp8,fp8,0,0.3535093466440837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,float16,0,0.8207840124766032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,float16,0,0.36897599697113037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,2,64,0,1,float16,fp8,0,0.8187200228373209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,2,64,0,1,fp8,fp8,0,0.7337546348571777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,4,64,128,1,float16,fp8,0,0.37039466698964435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,float16,0,0.8218186696370443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,4,64,128,1,fp8,fp8,0,0.35094932715098065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,float16,0,0.37032000223795575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,4,64,0,1,float16,fp8,0,0.8216959635416666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,4,64,0,1,fp8,fp8,0,0.7372639973958334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,8,64,128,1,float16,fp8,0,0.3716799815495809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,float16,0,0.820576032002767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,8,64,128,1,fp8,fp8,0,0.3521973292032878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,8,64,0,1,float16,fp8,0,0.8252480030059814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,8,64,0,1,fp8,fp8,0,0.736080010732015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,float16,0,2.2839199701944985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,1,64,128,1,fp8,fp8,0,2.1319252649943032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,1,64,128,1,float16,fp8,0,2.309077262878418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,float16,0,2.2956639925638833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,float16,0,5.932191848754883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,1,64,0,1,fp8,fp8,0,5.185370763142903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,1,64,0,1,float16,fp8,0,5.952218373616536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,2,64,128,1,float16,fp8,0,2.3157599767049155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,2,64,128,1,fp8,fp8,0,2.1572426160176597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,float16,0,5.937658945719401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,float16,0,2.3078667322794595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,2,64,0,1,fp8,fp8,0,5.210122744242351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,2,64,0,1,float16,fp8,0,5.9661865234375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,4,64,128,1,float16,fp8,0,2.330458641052246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,4,64,128,1,fp8,fp8,0,2.1729493141174316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,float16,0,5.961525599161784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,float16,0,2.3260480562845864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,4,64,0,1,float16,fp8,0,5.995925267537435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,4,64,0,1,fp8,fp8,0,5.227919896443685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,8,64,128,1,float16,fp8,0,2.3490293820699057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,8,64,128,1,fp8,fp8,0,2.1983680725097656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,float16,0,5.994202931722005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,float16,0,1.3448692957560222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,64,64,128,1,float16,fp8,0,1.378346602121989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,8,64,0,1,fp8,fp8,0,5.258495966593425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,8,64,0,1,float16,fp8,0,6.0247147878011065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,64,64,128,1,fp8,fp8,0,1.310693343480428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,float16,0,3.2418667475382485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,float16,0,1.1767626603444417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,64,64,0,1,float16,fp8,0,3.27837340037028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,64,64,0,1,fp8,fp8,0,2.8893438975016275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,1,64,128,1,float16,fp8,0,1.187557299931844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,float16,0,3.0107625325520835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,1,64,128,1,fp8,fp8,0,1.1060106754302979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,float16,0,1.1815413633982341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,1,64,0,1,fp8,fp8,0,2.6423999468485513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,1,64,0,1,float16,fp8,0,3.026128133138021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,2,64,128,1,float16,fp8,0,1.1932960351308186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,float16,0,3.0209760665893555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,2,64,128,1,fp8,fp8,0,1.1098986466725667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,float16,0,1.186079978942871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,2,64,0,1,float16,fp8,0,3.032069206237793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,2,64,0,1,fp8,fp8,0,2.649850686391195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,4,64,128,1,float16,fp8,0,1.1980693340301514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,float16,0,3.026437441507975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,4,64,128,1,fp8,fp8,0,1.115242640177409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,float16,0,1.1957706610361736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,4,64,0,1,float16,fp8,0,3.040581385294596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,4,64,0,1,fp8,fp8,0,2.659279982248942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,8,64,128,1,float16,fp8,0,1.2093226909637451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,float16,0,3.047327995300293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,8,64,128,1,fp8,fp8,0,1.1306880315144856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,float16,0,0.7096800009409586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,64,64,128,1,float16,fp8,0,0.7283039887746176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,float16,0,1.6753865877787273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,8,64,0,1,float16,fp8,0,3.0618934631347656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,8,64,0,1,fp8,fp8,0,2.6710987091064453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,64,64,128,1,fp8,fp8,0,0.696890672047933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,float16,0,0.6292213201522827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,64,64,0,1,fp8,fp8,0,1.498970667521159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,64,64,0,1,float16,fp8,0,1.6962614059448242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,1,64,128,1,float16,fp8,0,0.6345013380050659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,float16,0,1.5623092651367188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,1,64,128,1,fp8,fp8,0,0.5947893460591634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,float16,0,0.6314773162206014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,1,64,0,1,float16,fp8,0,1.565615971883138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,1,64,0,1,fp8,fp8,0,1.3790879249572754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,2,64,128,1,float16,fp8,0,0.6381920178731283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,2,64,128,1,fp8,fp8,0,0.5991146564483643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,float16,0,1.5638133684794109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,2,64,0,1,fp8,fp8,0,1.3827892939249675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,2,64,0,1,float16,fp8,0,1.5712800025939941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,float16,0,0.6348426739374796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,4,64,128,1,float16,fp8,0,0.6409279902776083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,4,64,128,1,fp8,fp8,0,0.6029866536458334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,float16,0,1.5704320271809895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,4,64,0,1,fp8,fp8,0,1.3869387308756511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,float16,0,0.6381440162658691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,4,64,0,1,float16,fp8,0,1.5758825937906902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,8,64,128,1,fp8,fp8,0,0.607744018236796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,8,64,128,1,float16,fp8,0,0.6468533277511597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,float16,0,0.3993706703186035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,float16,0,1.5819679896036785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,8,64,0,1,float16,fp8,0,1.585210641225179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,64,64,128,1,float16,fp8,0,0.4108853340148926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,8,64,0,1,fp8,fp8,0,1.3937493960062664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,64,64,128,1,fp8,fp8,0,0.39417600631713867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,float16,0,0.8967466354370117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,float16,0,0.35785067081451416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,64,64,0,1,fp8,fp8,0,0.8080586592356364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,64,64,0,1,float16,fp8,0,0.9079466660817465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,1,64,128,1,float16,fp8,0,0.3611573378245036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,float16,0,0.8399733702341715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,1,64,128,1,fp8,fp8,0,0.3428586721420288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,float16,0,0.35966400305430096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,1,64,0,1,float16,fp8,0,0.8430293401082357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,1,64,0,1,fp8,fp8,0,0.7468907038370768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,float16,0,0.8393386999766032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,2,64,128,1,float16,fp8,0,0.3619840145111084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,2,64,128,1,fp8,fp8,0,0.34409066041310626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,float16,0,0.3608106772104899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,2,64,0,1,float16,fp8,0,0.8424692948659261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,2,64,0,1,fp8,fp8,0,0.7488106886545817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,float16,0,0.8414933681488037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,4,64,128,1,fp8,fp8,0,0.34779198964436847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,4,64,128,1,float16,fp8,0,0.36441067854563397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,float16,0,0.3636853297551473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,4,64,0,1,float16,fp8,0,0.8478133678436279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,4,64,0,1,fp8,fp8,0,0.7523840268452963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,float16,0,0.8490293025970459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,8,64,128,1,float16,fp8,0,0.36807998021443683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,8,64,128,1,fp8,fp8,0,0.3492586612701416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,float16,0,0.28313066562016803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,8,64,0,1,float16,fp8,0,0.8534986972808838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,float16,0,0.54585067431132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,8,64,0,1,fp8,fp8,0,0.7563786506652832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,64,64,128,1,float16,fp8,0,0.28356266021728516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,64,64,128,1,fp8,fp8,0,0.26980799436569214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,float16,0,0.2813120086987813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,64,64,0,1,float16,fp8,0,0.5458346605300903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,64,64,0,1,fp8,fp8,0,0.4931199947992961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,1,64,128,1,float16,fp8,0,0.2821919918060303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,float16,0,0.5339200099309286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,1,64,128,1,fp8,fp8,0,0.26934399207433063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,1,64,0,1,float16,fp8,0,0.5368906656901041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,float16,0,0.280239999294281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,1,64,0,1,fp8,fp8,0,0.484554648399353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,float16,0,0.5358826716740926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,2,64,128,1,fp8,fp8,0,0.2686400016148885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,2,64,128,1,float16,fp8,0,0.2826133370399475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,2,64,0,1,float16,fp8,0,0.5355199972788492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,float16,0,0.28144532442092896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,2,64,0,1,fp8,fp8,0,0.48369598388671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,float16,0,0.5373760064442953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,4,64,128,1,float16,fp8,0,0.2804853320121765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,4,64,128,1,fp8,fp8,0,0.2693173289299011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,4,64,0,1,float16,fp8,0,0.5379466613133749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,float16,0,0.27988799413045246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,4,64,0,1,fp8,fp8,0,0.48479998111724854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,float16,0,0.5378026564915975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,8,64,128,1,float16,fp8,0,0.28139734268188477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,8,64,128,1,fp8,fp8,0,0.2694773276646932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,8,64,0,1,float16,fp8,0,0.5397706826527914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,8,64,0,1,fp8,fp8,0,0.4859786828358968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,float16,0,3.0136000315348306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,1,64,128,1,fp8,fp8,0,2.818277359008789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,1,64,128,1,float16,fp8,0,3.0417601267496743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,float16,0,6.060431798299153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,float16,0,3.0500160853068032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,1,64,0,1,fp8,fp8,0,5.3617814381917315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,1,64,0,1,float16,fp8,0,6.090661366780599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,2,64,128,1,float16,fp8,0,3.074549357096354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,2,64,128,1,fp8,fp8,0,2.863493283589681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,float16,0,6.096506754557292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,float16,0,3.066901206970215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,2,64,0,1,float16,fp8,0,6.128853480021159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,2,64,0,1,fp8,fp8,0,5.4073225657145185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,float16,0,6.133455912272136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,4,64,128,1,fp8,fp8,0,2.888517379760742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,4,64,128,1,float16,fp8,0,3.093658765157064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,float16,0,3.097994804382324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,4,64,0,1,float16,fp8,0,6.150906880696614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,4,64,0,1,fp8,fp8,0,5.423824310302734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,8,64,128,1,float16,fp8,0,3.1254186630249023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,8,64,128,1,fp8,fp8,0,2.9180692036946616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,float16,0,6.178005218505859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,float16,0,1.7447466850280762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,8,64,0,1,fp8,fp8,0,5.472485224405925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,64,64,128,1,float16,fp8,0,1.7815039952596028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,float16,0,3.3837865193684897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,8,64,0,1,float16,fp8,0,6.204048156738281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,64,64,128,1,fp8,fp8,0,1.6987999280293782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,float16,0,1.5166239738464355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,64,64,0,1,fp8,fp8,0,3.0237598419189453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,64,64,0,1,float16,fp8,0,3.4174133936564126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,1,64,128,1,float16,fp8,0,1.5310986836751301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,1,64,128,1,fp8,fp8,0,1.417535940806071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,float16,0,3.0486987431844077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,float16,0,1.520869255065918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,1,64,0,1,fp8,fp8,0,2.6988693873087564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,1,64,0,1,float16,fp8,0,3.061375935872396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,float16,0,3.0544745127360025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,2,64,128,1,float16,fp8,0,1.5362613995869954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,2,64,128,1,fp8,fp8,0,1.425322691599528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,float16,0,1.5299253463745117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,2,64,0,1,float16,fp8,0,3.0737441380818686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,2,64,0,1,fp8,fp8,0,2.7052958806355796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,float16,0,3.074037233988444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,4,64,128,1,fp8,fp8,0,1.4344159762064617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,4,64,128,1,float16,fp8,0,1.5459200541178386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,float16,0,1.5410666465759277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,4,64,0,1,float16,fp8,0,3.087045351664225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,4,64,0,1,fp8,fp8,0,2.717514673868815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,float16,0,3.0918025970458984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,8,64,128,1,float16,fp8,0,1.5586506525675456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,8,64,128,1,fp8,fp8,0,1.452298641204834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,float16,0,0.899679978688558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,64,64,128,1,float16,fp8,0,0.9217813014984131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,8,64,0,1,float16,fp8,0,3.111445426940918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,float16,0,1.7286399205525715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,8,64,0,1,fp8,fp8,0,2.7360639572143555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,64,64,128,1,fp8,fp8,0,0.8782986799875895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,float16,0,0.7868053118387858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,64,64,0,1,float16,fp8,0,1.7506507237752278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,64,64,0,1,fp8,fp8,0,1.551813284556071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,1,64,128,1,float16,fp8,0,0.7954133351643881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,float16,0,1.561914602915446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,1,64,128,1,fp8,fp8,0,0.740933338801066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,float16,0,0.7892800172170004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,1,64,0,1,float16,fp8,0,1.5701707204182942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,1,64,0,1,fp8,fp8,0,1.3902133305867512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,float16,0,1.5692747433980305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,2,64,128,1,float16,fp8,0,0.7977173328399658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,2,64,128,1,fp8,fp8,0,0.7437120278676351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,2,64,0,1,float16,fp8,0,1.5773812929789226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,float16,0,0.7933279673258463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,2,64,0,1,fp8,fp8,0,1.393642743428548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,float16,0,1.576906681060791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,4,64,128,1,fp8,fp8,0,0.7494239807128906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,4,64,128,1,float16,fp8,0,0.8027839660644531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,float16,0,0.8013066450754801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,4,64,0,1,float16,fp8,0,1.5847360293070476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,4,64,0,1,fp8,fp8,0,1.3996319770812988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,float16,0,1.5852959950764973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,8,64,128,1,float16,fp8,0,0.8109546502431234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,8,64,128,1,fp8,fp8,0,0.7578986485799154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,float16,0,0.4796586831410726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,float16,0,0.9030453364054362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,64,64,128,1,float16,fp8,0,0.49081599712371826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,8,64,0,1,fp8,fp8,0,1.4082400004069011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,8,64,0,1,float16,fp8,0,1.594223976135254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,float16,0,0.4230399926503499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,64,64,128,1,fp8,fp8,0,0.4704586664835612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,64,64,0,1,float16,fp8,0,0.9144852956136068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,64,64,0,1,fp8,fp8,0,0.8152373631795248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,1,64,128,1,float16,fp8,0,0.4267093340555827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,1,64,128,1,fp8,fp8,0,0.40213334560394287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,float16,0,0.8216319878896078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,float16,0,0.42450666427612305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,1,64,0,1,float16,fp8,0,0.8250880241394043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,1,64,0,1,fp8,fp8,0,0.7358453273773193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,float16,0,0.8229386806488037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,2,64,128,1,float16,fp8,0,0.42745598157246906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,2,64,128,1,fp8,fp8,0,0.40435731410980225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,float16,0,0.4249066511789958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,2,64,0,1,float16,fp8,0,0.8285173575083414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,2,64,0,1,fp8,fp8,0,0.7378239631652832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,float16,0,0.8261120319366455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,4,64,128,1,fp8,fp8,0,0.40637866655985516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,4,64,128,1,float16,fp8,0,0.42979733149210614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,float16,0,0.4293973445892334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,4,64,0,1,fp8,fp8,0,0.7398560047149658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,4,64,0,1,float16,fp8,0,0.8317440350850424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,float16,0,0.8307733535766602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,8,64,128,1,float16,fp8,0,0.43560532728830975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,8,64,128,1,fp8,fp8,0,0.4105493227640788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,float16,0,0.2720693349838257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,8,64,0,1,float16,fp8,0,0.8386186758677164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,8,64,0,1,fp8,fp8,0,0.7444480260213217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,float16,0,0.49243732293446857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,64,64,128,1,float16,fp8,0,0.279968003431956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,64,64,128,1,fp8,fp8,0,0.2704906662305196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,64,64,0,1,float16,fp8,0,0.5001173416773478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,64,64,0,1,fp8,fp8,0,0.45109331607818604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,float16,0,0.23987199862798056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,float16,0,0.44732268651326496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,1,64,128,1,float16,fp8,0,0.24172266324361166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,1,64,128,1,fp8,fp8,0,0.23417067527770996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,float16,0,0.23884799083073935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,1,64,0,1,fp8,fp8,0,0.40850667158762616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,1,64,0,1,float16,fp8,0,0.44972801208496094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,2,64,128,1,float16,fp8,0,0.2413546641667684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,2,64,128,1,fp8,fp8,0,0.23435733715693155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,float16,0,0.4474293390909831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,float16,0,0.24115200837453207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,2,64,0,1,float16,fp8,0,0.45002134641011554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,2,64,0,1,fp8,fp8,0,0.4068106810251872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,4,64,128,1,float16,fp8,0,0.24286399284998575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,4,64,128,1,fp8,fp8,0,0.23507734139760336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,float16,0,0.45176533857981366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,4,64,0,1,fp8,fp8,0,0.4103253285090129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,4,64,0,1,float16,fp8,0,0.4527786572774251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,float16,0,0.24502400557200113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,8,64,128,1,float16,fp8,0,0.24833067258199057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,8,64,128,1,fp8,fp8,0,0.23905066649119058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,float16,0,0.4562293291091919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,float16,0,0.19539199272791544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,8,64,0,1,float16,fp8,0,0.4579999844233195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,8,64,0,1,fp8,fp8,0,0.4131306807200114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,64,64,128,1,float16,fp8,0,0.19372800985972086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,float16,0,0.31432000796000165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,64,64,128,1,fp8,fp8,0,0.18569600582122803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,64,64,0,1,fp8,fp8,0,0.283733328183492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,64,64,0,1,float16,fp8,0,0.3143999973932902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,float16,0,0.18985066811243692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,1,64,128,1,float16,fp8,0,0.19008000691731772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,1,64,128,1,fp8,fp8,0,0.18365333477656046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,float16,0,0.3054719964663188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,float16,0,0.18969599405924478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,1,64,0,1,float16,fp8,0,0.3074079950650533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,1,64,0,1,fp8,fp8,0,0.2778986692428589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,2,64,128,1,float16,fp8,0,0.19010132551193237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,float16,0,0.30473599831263226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,2,64,128,1,fp8,fp8,0,0.18361065785090128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,2,64,0,1,float16,fp8,0,0.30646934111913043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,float16,0,0.1900213360786438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,2,64,0,1,fp8,fp8,0,0.2776533365249634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,float16,0,0.3067946632703145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,4,64,128,1,float16,fp8,0,0.1893813411394755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,4,64,128,1,fp8,fp8,0,0.18263467152913412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,4,64,0,1,float16,fp8,0,0.30431467294692993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,float16,0,0.18958399693171182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,4,64,0,1,fp8,fp8,0,0.27990933259328205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,8,64,128,1,float16,fp8,0,0.189520001411438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,float16,0,0.30566932757695514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,8,64,128,1,fp8,fp8,0,0.18436266978581747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,8,64,0,1,float16,fp8,0,0.30636266867319745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,8,64,0,1,fp8,fp8,0,0.27932800849278766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,float16,0,2.2370452880859375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,1,64,128,1,fp8,fp8,0,2.0856800079345703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,1,64,128,1,float16,fp8,0,2.2591679890950522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,float16,0,3.8457492192586265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,float16,0,2.254437287648519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,1,64,0,1,fp8,fp8,0,3.4251413345336914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,1,64,0,1,float16,fp8,0,3.8719094594319663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,2,64,128,1,float16,fp8,0,2.2705066998799643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,float16,0,3.8595892588297525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,2,64,128,1,fp8,fp8,0,2.1130827267964682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,2,64,0,1,float16,fp8,0,3.886768023173014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,float16,0,2.2624319394429526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,2,64,0,1,fp8,fp8,0,3.4473225275675454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,float16,0,3.882960001627604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,4,64,128,1,fp8,fp8,0,2.1259892781575522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,4,64,128,1,float16,fp8,0,2.2853172620137534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,float16,0,2.28383461634318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,4,64,0,1,float16,fp8,0,3.9037386576334634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,4,64,0,1,fp8,fp8,0,3.465338706970215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,float16,0,3.909109433492025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,8,64,128,1,fp8,fp8,0,2.1509547233581543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,8,64,128,1,float16,fp8,0,2.304543972015381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,float16,0,1.3065600395202637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,8,64,0,1,float16,fp8,0,3.9321600596110025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,float16,0,2.1687466303507485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,8,64,0,1,fp8,fp8,0,3.496554692586263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,64,64,128,1,float16,fp8,0,1.3351680437723796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,64,64,128,1,fp8,fp8,0,1.2700533072153728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,float16,0,1.136954704920451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,64,64,0,1,float16,fp8,0,2.1974026362101235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,64,64,0,1,fp8,fp8,0,1.981866677602132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,1,64,128,1,float16,fp8,0,1.1496799786885579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,float16,0,1.9517760276794434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,1,64,128,1,fp8,fp8,0,1.0652426878611247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,float16,0,1.1444693406422932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,1,64,0,1,float16,fp8,0,1.9636160532633464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,1,64,0,1,fp8,fp8,0,1.740623950958252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,float16,0,1.9549706776936848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,2,64,128,1,float16,fp8,0,1.1551199754079182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,2,64,128,1,fp8,fp8,0,1.0738826592763264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,2,64,0,1,float16,fp8,0,1.96723206837972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,float16,0,1.1492533683776855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,2,64,0,1,fp8,fp8,0,1.74454927444458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,float16,0,1.9658719698588054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,4,64,128,1,float16,fp8,0,1.1598133246103923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,4,64,128,1,fp8,fp8,0,1.0790560245513916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,float16,0,1.1589813232421875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,4,64,0,1,fp8,fp8,0,1.7534027099609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,4,64,0,1,float16,fp8,0,1.9783627192179363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,8,64,128,1,float16,fp8,0,1.171573321024577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,8,64,128,1,fp8,fp8,0,1.0916159947713215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,float16,0,1.9760266939798992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,float16,0,0.6772426764170328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,8,64,0,1,fp8,fp8,0,1.7705599466959636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,8,64,0,1,float16,fp8,0,1.9926986694335938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,float16,0,1.114736000696818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,64,64,128,1,float16,fp8,0,0.6956640084584554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,64,64,128,1,fp8,fp8,0,0.6623520056406657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,float16,0,0.5935146808624268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,64,64,0,1,float16,fp8,0,1.132805347442627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,64,64,0,1,fp8,fp8,0,1.0245760281880696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,float16,0,1.0052639643351238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,1,64,128,1,float16,fp8,0,0.5993760029474894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,1,64,128,1,fp8,fp8,0,0.5608586470286051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,float16,0,0.5952639977137247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,1,64,0,1,fp8,fp8,0,0.9031573136647543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,1,64,0,1,float16,fp8,0,1.0141386985778809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,float16,0,1.0093226432800293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,2,64,128,1,float16,fp8,0,0.6017813285191854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,2,64,128,1,fp8,fp8,0,0.5635999838511149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,2,64,0,1,float16,fp8,0,1.017301321029663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,float16,0,0.5989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,2,64,0,1,fp8,fp8,0,0.9057760238647461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,float16,0,1.0137920379638672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,4,64,128,1,float16,fp8,0,0.606117328008016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,4,64,128,1,fp8,fp8,0,0.5664159854253134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,4,64,0,1,float16,fp8,0,1.0195679664611816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,float16,0,0.6053066651026408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,4,64,0,1,fp8,fp8,0,0.9112213452657064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,float16,0,1.0217119852701824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,8,64,128,1,float16,fp8,0,0.6109013160069784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,8,64,128,1,fp8,fp8,0,0.5728319883346558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,float16,0,0.36378665765126544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,8,64,0,1,float16,fp8,0,1.0295466581980388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,8,64,0,1,fp8,fp8,0,0.9178826808929443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,float16,0,0.591269334157308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,64,64,128,1,float16,fp8,0,0.37406933307647705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,64,64,128,1,fp8,fp8,0,0.35812799135843915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,float16,0,0.31994666655858356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,64,64,0,1,float16,fp8,0,0.5999573469161987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,64,64,0,1,fp8,fp8,0,0.5447786649068197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,1,64,128,1,float16,fp8,0,0.32226133346557617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,float16,0,0.5345813433329264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,1,64,128,1,fp8,fp8,0,0.306005338827769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,1,64,0,1,float16,fp8,0,0.536298672358195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,float16,0,0.3205813368161519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,1,64,0,1,fp8,fp8,0,0.48309866587320965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,float16,0,0.5337866544723511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,2,64,128,1,float16,fp8,0,0.3229920069376628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,2,64,128,1,fp8,fp8,0,0.3083146611849467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,2,64,0,1,float16,fp8,0,0.5365279912948608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,2,64,0,1,fp8,fp8,0,0.4863893191019694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,float16,0,0.3227253357569377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,float16,0,0.5369066794713339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,4,64,128,1,float16,fp8,0,0.32660265763600665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,4,64,128,1,fp8,fp8,0,0.3104746739069621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,4,64,0,1,float16,fp8,0,0.5417439937591553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,float16,0,0.32703999678293866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,4,64,0,1,fp8,fp8,0,0.48875733216603595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,float16,0,0.5413973331451416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,8,64,128,1,fp8,fp8,0,0.31379733482996625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,8,64,128,1,float16,fp8,0,0.3308746616045634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,8,64,0,1,float16,fp8,0,0.5458613236745199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,float16,0,0.20990933974583945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,8,64,0,1,fp8,fp8,0,0.49188268184661865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,float16,0,0.32969599962234497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,64,64,128,1,float16,fp8,0,0.21417067448298135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,64,64,128,1,fp8,fp8,0,0.2079733411471049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,64,64,0,1,float16,fp8,0,0.33482134342193604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,float16,0,0.1818986733754476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,64,64,0,1,fp8,fp8,0,0.30637333790461224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,float16,0,0.2950826684633891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,1,64,128,1,float16,fp8,0,0.1832586725552877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,1,64,128,1,fp8,fp8,0,0.17887999614079794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,1,64,0,1,float16,fp8,0,0.29755733410517377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,1,64,0,1,fp8,fp8,0,0.2731413245201111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,float16,0,0.18085867166519165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,float16,0,0.29578665892283124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,2,64,128,1,fp8,fp8,0,0.17899733781814575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,2,64,128,1,float16,fp8,0,0.18438933293024698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,2,64,0,1,float16,fp8,0,0.29757867256800336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,2,64,0,1,fp8,fp8,0,0.27350932359695435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,float16,0,0.18286399046579996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,float16,0,0.29810667037963867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,4,64,128,1,float16,fp8,0,0.185263991355896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,4,64,128,1,fp8,fp8,0,0.1807039976119995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,4,64,0,1,float16,fp8,0,0.2998773256937663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,float16,0,0.18532266219456991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,4,64,0,1,fp8,fp8,0,0.2762506604194641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,float16,0,0.2985600034395854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,8,64,128,1,float16,fp8,0,0.18916267156600952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,8,64,128,1,fp8,fp8,0,0.1833440065383911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,float16,0,0.15028267105420431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,8,64,0,1,float16,fp8,0,0.301199992497762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,8,64,0,1,fp8,fp8,0,0.27746133009592694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,64,64,128,1,float16,fp8,0,0.1500746707121531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,float16,0,0.21618666251500449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,64,64,128,1,fp8,fp8,0,0.14442666371663412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,float16,0,0.14830933014551798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,64,64,0,1,fp8,fp8,0,0.19934932390848795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,64,64,0,1,float16,fp8,0,0.21782932678858438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,1,64,128,1,float16,fp8,0,0.14826132853825888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,float16,0,0.21103467543919882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,1,64,128,1,fp8,fp8,0,0.14193600416183472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,1,64,0,1,float16,fp8,0,0.21182399988174438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,1,64,0,1,fp8,fp8,0,0.19374932845433554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,float16,0,0.14681067069371542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,2,64,128,1,float16,fp8,0,0.1472053329149882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,float16,0,0.2119040091832479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,2,64,128,1,fp8,fp8,0,0.14086932937304178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,2,64,0,1,float16,fp8,0,0.21023466189702353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,2,64,0,1,fp8,fp8,0,0.19342400630315146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,float16,0,0.14680000146230063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,4,64,128,1,float16,fp8,0,0.1469066639741262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,float16,0,0.21025067567825317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,4,64,128,1,fp8,fp8,0,0.14083733161290488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,4,64,0,1,float16,fp8,0,0.21005332469940186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,4,64,0,1,fp8,fp8,0,0.1938613255818685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,float16,0,0.1467573344707489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,8,64,128,1,float16,fp8,0,0.1481119990348816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,float16,0,0.2104426622390747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,8,64,128,1,fp8,fp8,0,0.14177067081133524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,8,64,0,1,float16,fp8,0,0.21029333273569742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,8,64,0,1,fp8,fp8,0,0.19527999560038248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,float16,0,2.9927520751953125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,1,64,128,1,float16,fp8,0,3.0065972010294595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,float16,0,4.2323252360026045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,1,64,128,1,fp8,fp8,0,2.7851413091023765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,1,64,0,1,fp8,fp8,0,3.8016907374064126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,float16,0,3.0415252049764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,1,64,0,1,float16,fp8,0,4.256938616434733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,2,64,128,1,float16,fp8,0,3.0433600743611655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,float16,0,4.290495872497559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,2,64,128,1,fp8,fp8,0,2.805568059285482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,2,64,0,1,float16,fp8,0,4.288842519124349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,float16,0,3.0384693145751953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,2,64,0,1,fp8,fp8,0,3.824432055155436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,float16,0,4.281802813212077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,4,64,128,1,float16,fp8,0,3.057509422302246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,4,64,128,1,fp8,fp8,0,2.827797253926595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,float16,0,3.095269203186035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,4,64,0,1,float16,fp8,0,4.307509422302246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,4,64,0,1,fp8,fp8,0,3.842538515726725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,float16,0,4.354895909627278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,8,64,128,1,fp8,fp8,0,2.8581279118855796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,8,64,128,1,float16,fp8,0,3.0982879002889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,8,64,0,1,float16,fp8,0,4.3529707590738935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,float16,0,1.718549410502116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,float16,0,2.40338134765625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,64,64,128,1,float16,fp8,0,1.7471733093261719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,8,64,0,1,fp8,fp8,0,3.8862508138020835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,64,64,128,1,fp8,fp8,0,1.6687199274698894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,64,64,0,1,float16,fp8,0,2.4333173433939614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,float16,0,1.489199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,64,64,0,1,fp8,fp8,0,2.2215360005696616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,float16,0,2.1151092847188315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,1,64,128,1,fp8,fp8,0,1.3891414006551106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,1,64,128,1,float16,fp8,0,1.5013813972473145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,1,64,0,1,fp8,fp8,0,1.9017705917358398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,1,64,0,1,float16,fp8,0,2.12394126256307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,float16,0,1.4939360618591309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,float16,0,2.121679941813151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,2,64,128,1,float16,fp8,0,1.5098719596862793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,2,64,128,1,fp8,fp8,0,1.396389325459798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,2,64,0,1,float16,fp8,0,2.137594699859619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,2,64,0,1,fp8,fp8,0,1.910645325978597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,float16,0,1.5026826858520508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,float16,0,2.129765351613363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,4,64,128,1,float16,fp8,0,1.5200053850809734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,4,64,128,1,fp8,fp8,0,1.4103360176086426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,4,64,0,1,float16,fp8,0,2.147205352783203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,4,64,0,1,fp8,fp8,0,1.9210933049519856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,float16,0,1.5170613924662273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,float16,0,2.1499573389689126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,8,64,128,1,float16,fp8,0,1.5331786473592122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,8,64,128,1,fp8,fp8,0,1.42412265141805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,float16,0,0.8767306804656982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,8,64,0,1,float16,fp8,0,2.1671093304951987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,float16,0,1.2217439810434978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,8,64,0,1,fp8,fp8,0,1.9396106402079265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,64,64,128,1,float16,fp8,0,0.8945759932200114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,64,64,128,1,fp8,fp8,0,0.8516906897226969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,64,64,0,1,fp8,fp8,0,1.1331146558125813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,64,64,0,1,float16,fp8,0,1.241498629252116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,float16,0,0.7614879608154297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,float16,0,1.0778453350067139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,1,64,128,1,fp8,fp8,0,0.7144800027211508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,1,64,128,1,float16,fp8,0,0.7687520186106364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,1,64,0,1,float16,fp8,0,1.0867359638214111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,float16,0,0.7648053169250488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,1,64,0,1,fp8,fp8,0,0.9758079846700033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,float16,0,1.0828746954600017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,2,64,128,1,float16,fp8,0,0.7726879914601644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,2,64,128,1,fp8,fp8,0,0.7194773356119791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,2,64,0,1,float16,fp8,0,1.092959960301717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,2,64,0,1,fp8,fp8,0,0.978111982345581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,float16,0,0.7686453660329183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,4,64,128,1,float16,fp8,0,0.7782453695933024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,float16,0,1.090394655863444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,4,64,128,1,fp8,fp8,0,0.7254933516184489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,4,64,0,1,float16,fp8,0,1.0973172982533772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,float16,0,0.7767306963602701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,4,64,0,1,fp8,fp8,0,0.9863999684651693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,8,64,128,1,fp8,fp8,0,0.7331093152364095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,8,64,128,1,float16,fp8,0,0.7850986321767172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,float16,0,1.0982826550801594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,float16,0,0.4572853247324626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,8,64,0,1,fp8,fp8,0,0.9927146434783936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,8,64,0,1,float16,fp8,0,1.108581304550171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,float16,0,0.6350666681925455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,64,64,128,1,float16,fp8,0,0.4697920083999634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,64,64,128,1,fp8,fp8,0,0.4480106830596924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,64,64,0,1,fp8,fp8,0,0.5927840073903402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,64,64,0,1,float16,fp8,0,0.6464373270670573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,float16,0,0.39897600809733075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,1,64,128,1,fp8,fp8,0,0.37963199615478516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,1,64,128,1,float16,fp8,0,0.4023520151774089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,float16,0,0.5632373491923014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,1,64,0,1,float16,fp8,0,0.5670773188273112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,float16,0,0.4008479913075765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,1,64,0,1,fp8,fp8,0,0.5141439835230509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,2,64,128,1,float16,fp8,0,0.4049599965413411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,float16,0,0.5653440157572428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,2,64,128,1,fp8,fp8,0,0.3813333511352539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,2,64,0,1,float16,fp8,0,0.5683840115865072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,2,64,0,1,fp8,fp8,0,0.515450676282247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,float16,0,0.4031840165456136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,float16,0,0.5674346685409546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,4,64,128,1,float16,fp8,0,0.4076373179753621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,4,64,128,1,fp8,fp8,0,0.3840906620025635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,4,64,0,1,float16,fp8,0,0.5711679855982462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,float16,0,0.4064106543858846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,4,64,0,1,fp8,fp8,0,0.5169546604156494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,float16,0,0.5713173151016235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,8,64,128,1,float16,fp8,0,0.41285868485768634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,8,64,128,1,fp8,fp8,0,0.3880639870961507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,8,64,0,1,float16,fp8,0,0.5760533412297567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,float16,0,0.24897066752115884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,float16,0,0.3418826659520467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,8,64,0,1,fp8,fp8,0,0.5223519802093506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,64,64,128,1,float16,fp8,0,0.25486934185028076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,64,64,128,1,fp8,fp8,0,0.2455199956893921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,64,64,0,1,float16,fp8,0,0.34865065415700275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,float16,0,0.21391467253367105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,64,64,0,1,fp8,fp8,0,0.32103466987609863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,float16,0,0.30059732993443805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,1,64,128,1,float16,fp8,0,0.21685334046681723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,1,64,128,1,fp8,fp8,0,0.20998932917912802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,1,64,0,1,float16,fp8,0,0.30296534299850464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,1,64,0,1,fp8,fp8,0,0.27981332937876385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,float16,0,0.21406932671864828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,float16,0,0.3009546597798665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,2,64,128,1,float16,fp8,0,0.21809067328770956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,2,64,128,1,fp8,fp8,0,0.2099306583404541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,2,64,0,1,float16,fp8,0,0.3021013339360555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,2,64,0,1,fp8,fp8,0,0.2800160050392151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,float16,0,0.21719467639923096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,float16,0,0.3023413419723511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,4,64,128,1,float16,fp8,0,0.21842666467030844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,4,64,128,1,fp8,fp8,0,0.21189866463343301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,4,64,0,1,float16,fp8,0,0.30485333998998004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,4,64,0,1,fp8,fp8,0,0.2821226716041565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,float16,0,0.2193653384844462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,float16,0,0.30769066015879315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,8,64,128,1,fp8,fp8,0,0.21549334128697714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,8,64,128,1,float16,fp8,0,0.2225760022799174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,8,64,0,1,float16,fp8,0,0.30843732754389447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,float16,0,0.14380266269048056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,8,64,0,1,fp8,fp8,0,0.28571200370788574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,64,64,128,1,float16,fp8,0,0.14662399888038635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,float16,0,0.19431465864181519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,64,64,128,1,fp8,fp8,0,0.14448533455530801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,64,64,0,1,float16,fp8,0,0.1979680061340332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,64,64,0,1,fp8,fp8,0,0.1851039926211039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,float16,0,0.1242026686668396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,float16,0,0.1729546586672465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,1,64,128,1,float16,fp8,0,0.123690664768219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,1,64,128,1,fp8,fp8,0,0.11921067039171855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,1,64,0,1,float16,fp8,0,0.17335466543833414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,1,64,0,1,fp8,fp8,0,0.15752533078193665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,float16,0,0.12358933687210083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,float16,0,0.17121066649754843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,2,64,128,1,float16,fp8,0,0.12572266658147177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,2,64,128,1,fp8,fp8,0,0.11823466420173645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,2,64,0,1,float16,fp8,0,0.17287999391555786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,2,64,0,1,fp8,fp8,0,0.15780799587567648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,float16,0,0.12403733531634013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,float16,0,0.17287466923395792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,4,64,128,1,float16,fp8,0,0.12569600343704224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,4,64,128,1,fp8,fp8,0,0.11979732910792033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,4,64,0,1,fp8,fp8,0,0.15894933541615805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,float16,0,0.12588799993197122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,4,64,0,1,float16,fp8,0,0.1734079917271932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,float16,0,0.17350933949152628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,8,64,128,1,float16,fp8,0,0.1258133351802826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,8,64,128,1,fp8,fp8,0,0.1229759951432546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,8,64,0,1,float16,fp8,0,0.17468800147374472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,8,64,0,1,fp8,fp8,0,0.16058666507403055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,float16,0,0.10569600264231364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,float16,0,0.13205333550771078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,64,64,128,1,float16,fp8,0,0.10346666971842448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,64,64,0,1,float16,fp8,0,0.13191999991734824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,64,64,128,1,fp8,fp8,0,0.10171733299891154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,64,64,0,1,fp8,fp8,0,0.12362666924794515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,float16,0,0.10338667035102844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,float16,0,0.1309386690457662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,1,64,128,1,float16,fp8,0,0.1037013332049052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,1,64,128,1,fp8,fp8,0,0.1011199951171875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,1,64,0,1,float16,fp8,0,0.13015466928482056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,1,64,0,1,fp8,fp8,0,0.12178132931391399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,float16,0,0.10355200370152791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,2,64,128,1,float16,fp8,0,0.1034879982471466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,float16,0,0.13038399815559387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,2,64,128,1,fp8,fp8,0,0.10081066687901814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,2,64,0,1,float16,fp8,0,0.1300320029258728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,float16,0,0.10346666971842448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,2,64,0,1,fp8,fp8,0,0.12215466300646464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,float16,0,0.1312373379866282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,4,64,128,1,float16,fp8,0,0.1030453344186147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,4,64,128,1,fp8,fp8,0,0.10016533732414246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,4,64,0,1,float16,fp8,0,0.13143466909726462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,4,64,0,1,fp8,fp8,0,0.12179199854532878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,float16,0,0.10401599605878194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,float16,0,0.13115732868512472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,8,64,128,1,float16,fp8,0,0.10337066650390625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,8,64,128,1,fp8,fp8,0,0.09959466258684795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,8,64,0,1,float16,fp8,0,0.13214932878812155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,8,64,0,1,fp8,fp8,0,0.12151466806729634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,float16,0,2.2227306365966797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,1,64,128,1,float16,fp8,0,2.2327733039855957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,float16,0,2.8221813837687173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,1,64,128,1,fp8,fp8,0,2.0679359436035156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,1,64,0,1,float16,fp8,0,2.8382720947265625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,1,64,0,1,fp8,fp8,0,2.5497867266337075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,float16,0,2.251034736633301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,float16,0,2.852698644002279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,2,64,128,1,float16,fp8,0,2.257808049519857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,2,64,128,1,fp8,fp8,0,2.0859626134236655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,2,64,0,1,float16,fp8,0,2.8587681452433267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,float16,0,2.270986715952555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,2,64,0,1,fp8,fp8,0,2.570634682973226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,float16,0,2.877706527709961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,4,64,128,1,float16,fp8,0,2.2773653666178384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,4,64,128,1,fp8,fp8,0,2.102389335632324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,4,64,0,1,float16,fp8,0,2.8862291971842446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,4,64,0,1,fp8,fp8,0,2.5879467328389487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,float16,0,2.293269316355387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,float16,0,2.899338722229004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,8,64,128,1,fp8,fp8,0,2.1229066848754883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,8,64,128,1,float16,fp8,0,2.293445269266764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,float16,0,1.2863840262095134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,8,64,0,1,float16,fp8,0,2.8999147415161133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,float16,0,1.6318613688151042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,8,64,0,1,fp8,fp8,0,2.6130773226420083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,64,64,128,1,float16,fp8,0,1.3130666414896648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,64,64,128,1,fp8,fp8,0,1.2500053246815999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,64,64,0,1,float16,fp8,0,1.65447998046875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,64,64,0,1,fp8,fp8,0,1.5235466957092285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,float16,0,1.1148959795633953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,float16,0,1.416752020517985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,1,64,128,1,float16,fp8,0,1.124895970026652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,1,64,128,1,fp8,fp8,0,1.0418346722920735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,1,64,0,1,float16,fp8,0,1.4294346173604329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,1,64,0,1,fp8,fp8,0,1.2862133185068767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,float16,0,1.122383991877238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,float16,0,1.4242080052693684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,2,64,128,1,float16,fp8,0,1.1340586344401042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,2,64,128,1,fp8,fp8,0,1.0515999794006348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,2,64,0,1,float16,fp8,0,1.4371412595113118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,float16,0,1.1289066473642986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,2,64,0,1,fp8,fp8,0,1.2933173179626465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,float16,0,1.4326400756835938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,4,64,128,1,float16,fp8,0,1.140986680984497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,4,64,128,1,fp8,fp8,0,1.0570240020751953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,4,64,0,1,float16,fp8,0,1.4446825981140137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,4,64,0,1,fp8,fp8,0,1.3023893038431804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,float16,0,1.1394506295522053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,float16,0,1.4458826382954915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,8,64,128,1,float16,fp8,0,1.1510826746622722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,8,64,128,1,fp8,fp8,0,1.071354627609253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,float16,0,0.6607893308003744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,8,64,0,1,float16,fp8,0,1.4594720204671223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,float16,0,0.8358240127563477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,8,64,0,1,fp8,fp8,0,1.3164959748586018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,64,64,128,1,float16,fp8,0,0.6763359705607096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,64,64,128,1,fp8,fp8,0,0.6425226529439291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,64,64,0,1,float16,fp8,0,0.8483946323394775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,64,64,0,1,fp8,fp8,0,0.7819413344065348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,float16,0,0.5737280050913492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,float16,0,0.7274719874064127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,1,64,128,1,fp8,fp8,0,0.5396906534830729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,1,64,128,1,float16,fp8,0,0.5783733526865641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,1,64,0,1,float16,fp8,0,0.7332746982574463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,float16,0,0.5771679878234863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,1,64,0,1,fp8,fp8,0,0.6659946839014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,float16,0,0.7316426436106364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,2,64,128,1,float16,fp8,0,0.5822826623916626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,2,64,128,1,fp8,fp8,0,0.5437653462092081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,2,64,0,1,float16,fp8,0,0.7383039792378744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,2,64,0,1,fp8,fp8,0,0.6674400170644125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,float16,0,0.5809493462244669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,float16,0,0.7362666924794515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,4,64,128,1,float16,fp8,0,0.5871893167495728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,4,64,128,1,fp8,fp8,0,0.5478560129801432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,float16,0,0.5851200024286906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,4,64,0,1,float16,fp8,0,0.7413280010223389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,4,64,0,1,fp8,fp8,0,0.6739786465962728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,8,64,128,1,fp8,fp8,0,0.5544266700744629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,8,64,128,1,float16,fp8,0,0.5920373201370239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,float16,0,0.7421653270721436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,float16,0,0.3476693232854207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,8,64,0,1,fp8,fp8,0,0.6789546807607015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,8,64,0,1,float16,fp8,0,0.7484479745229086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,float16,0,0.43778133392333984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,64,64,128,1,float16,fp8,0,0.35525866349538165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,64,64,128,1,fp8,fp8,0,0.3389013210932414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,float16,0,0.29942933718363446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,64,64,0,1,fp8,fp8,0,0.41262932618459064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,64,64,0,1,float16,fp8,0,0.4456640084584554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,float16,0,0.3798346519470215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,1,64,128,1,float16,fp8,0,0.3040320078531901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,1,64,128,1,fp8,fp8,0,0.28913066784540814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,1,64,0,1,float16,fp8,0,0.38357333342234295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,float16,0,0.3020106752713521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,1,64,0,1,fp8,fp8,0,0.3531786600748698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,float16,0,0.3802453279495239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,2,64,128,1,float16,fp8,0,0.304751992225647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,2,64,128,1,fp8,fp8,0,0.2900000015894572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,2,64,0,1,float16,fp8,0,0.38488535086313885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,2,64,0,1,fp8,fp8,0,0.35577599207560223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,float16,0,0.30347200234731037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,float16,0,0.384549339612325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,4,64,128,1,float16,fp8,0,0.3086026708285014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,4,64,128,1,fp8,fp8,0,0.2922719915707906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,4,64,0,1,float16,fp8,0,0.3872906764348348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,float16,0,0.308351993560791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,4,64,0,1,fp8,fp8,0,0.359007994333903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,float16,0,0.3884640137354533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,8,64,128,1,float16,fp8,0,0.311408003171285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,8,64,128,1,fp8,fp8,0,0.29553600152333576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,8,64,0,1,float16,fp8,0,0.3941919803619385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,float16,0,0.18928533792495728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,float16,0,0.23767467339833578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,8,64,0,1,fp8,fp8,0,0.3612693150838216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,64,64,128,1,float16,fp8,0,0.19555733601252237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,64,64,128,1,fp8,fp8,0,0.18943466742833456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,64,64,0,1,float16,fp8,0,0.24242132902145386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,float16,0,0.1613653302192688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,64,64,0,1,fp8,fp8,0,0.22774400313695273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,float16,0,0.2048693299293518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,1,64,128,1,float16,fp8,0,0.16486400365829468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,1,64,128,1,fp8,fp8,0,0.15913599729537964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,1,64,0,1,float16,fp8,0,0.2065546711285909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,float16,0,0.16268799702326456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,1,64,0,1,fp8,fp8,0,0.1939786672592163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,float16,0,0.20546666781107584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,2,64,128,1,float16,fp8,0,0.16506133476893106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,2,64,128,1,fp8,fp8,0,0.15939733386039734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,2,64,0,1,float16,fp8,0,0.20683733622233072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,2,64,0,1,fp8,fp8,0,0.19604800144831339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,float16,0,0.16330666343371072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,float16,0,0.2053813338279724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,4,64,128,1,float16,fp8,0,0.16523200273513794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,4,64,128,1,fp8,fp8,0,0.16275733709335327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,float16,0,0.16636266311009726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,4,64,0,1,fp8,fp8,0,0.19590399662653604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,4,64,0,1,float16,fp8,0,0.20803733666737875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,float16,0,0.20830400784810385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,8,64,128,1,float16,fp8,0,0.16761600971221924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,8,64,128,1,fp8,fp8,0,0.1646719972292582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,8,64,0,1,float16,fp8,0,0.21185600757598877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,8,64,0,1,fp8,fp8,0,0.19978133837381998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,float16,0,0.11146666606267293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,float16,0,0.13697600364685059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,64,64,128,1,fp8,fp8,0,0.1120693286259969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,64,64,128,1,float16,fp8,0,0.11381333072980244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,64,64,0,1,float16,fp8,0,0.14033066232999167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,64,64,0,1,fp8,fp8,0,0.13218667109807333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,float16,0,0.09738133351008098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,float16,0,0.12104533116022746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,1,64,128,1,float16,fp8,0,0.09909866253534953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,1,64,128,1,fp8,fp8,0,0.09298666318257649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,1,64,0,1,float16,fp8,0,0.12261866529782613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,float16,0,0.0983733336130778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,1,64,0,1,fp8,fp8,0,0.11205333471298218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,float16,0,0.1213759978612264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,2,64,128,1,float16,fp8,0,0.09896000226338704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,2,64,128,1,fp8,fp8,0,0.09289600451787312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,2,64,0,1,fp8,fp8,0,0.11160533626874287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,2,64,0,1,float16,fp8,0,0.12211199601491292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,float16,0,0.09707732995351155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,float16,0,0.12170132994651794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,4,64,128,1,float16,fp8,0,0.09920533498128255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,4,64,0,1,float16,fp8,0,0.12205333511034648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,4,64,128,1,fp8,fp8,0,0.09317866961161296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,4,64,0,1,fp8,fp8,0,0.11359467109044392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,float16,0,0.09899733463923137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,float16,0,0.12260799606641133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,8,64,128,1,float16,fp8,0,0.09988266229629517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,8,64,128,1,fp8,fp8,0,0.09492266178131104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,8,64,0,1,float16,fp8,0,0.12463999787966411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,8,64,0,1,fp8,fp8,0,0.11386666695276897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,float16,0,0.08135466774304707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,float16,0,0.09762133161226909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,64,64,128,1,float16,fp8,0,0.08076799909273784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,64,64,128,1,fp8,fp8,0,0.07888533174991608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,float16,0,0.08091733356316884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,64,64,0,1,fp8,fp8,0,0.09122666716575623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,64,64,0,1,float16,fp8,0,0.09756799538930257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,float16,0,0.0971999963124593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,1,64,128,1,float16,fp8,0,0.08099199831485748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,1,64,128,1,fp8,fp8,0,0.07719466586907704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,1,64,0,1,float16,fp8,0,0.09742400050163269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,1,64,0,1,fp8,fp8,0,0.09117333094278972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,float16,0,0.08090666433175404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,2,64,128,1,float16,fp8,0,0.08086400230725606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,float16,0,0.09726400176684062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,2,64,128,1,fp8,fp8,0,0.07691200077533722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,2,64,0,1,float16,fp8,0,0.09710400303204854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,2,64,0,1,fp8,fp8,0,0.09097066521644592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,float16,0,0.08123733103275299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,4,64,128,1,float16,fp8,0,0.08078399797280629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,float16,0,0.09703466296195984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,4,64,128,1,fp8,fp8,0,0.079434668024381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,4,64,0,1,float16,fp8,0,0.09708266456921895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,4,64,0,1,fp8,fp8,0,0.09092266360918681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,float16,0,0.09725333253542583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,fp8,0,0.08077333370844524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,8,64,128,1,float16,float16,0,0.08121599753697713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,8,64,128,1,fp8,fp8,0,0.07880533238252004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,8,64,0,1,float16,fp8,0,0.09735999504725139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,8,64,0,1,fp8,fp8,0,0.09009599685668945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,float16,0,2.651477336883545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,1,64,128,1,float16,fp8,0,2.6433547337849936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,float16,0,3.028421401977539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,1,64,128,1,fp8,fp8,0,2.631178696950277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,1,64,0,1,float16,fp8,0,3.0204480489095054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,1,64,0,1,fp8,fp8,0,2.9000587463378906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,float16,0,2.673685391743978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,float16,0,3.0411945978800454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,2,64,128,1,float16,fp8,0,2.6674667994181314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,2,64,128,1,fp8,fp8,0,2.6950559616088867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,2,64,0,1,float16,fp8,0,3.0344692866007485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,2,64,0,1,fp8,fp8,0,2.989311854044596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,float16,0,2.756944020589193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,float16,0,3.1177279154459634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,4,64,128,1,float16,fp8,0,2.702357292175293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,4,64,128,1,fp8,fp8,0,2.718384106953939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,4,64,0,1,float16,fp8,0,3.083834648132324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,4,64,0,1,fp8,fp8,0,3.0317920049031577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,float16,0,2.8171892166137695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,float16,0,3.2239627838134766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,8,64,128,1,float16,fp8,0,2.7918240229288735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,8,64,128,1,fp8,fp8,0,2.8930187225341797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,8,64,0,1,float16,fp8,0,3.1770025889078775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,float16,0,1.4542400042215984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,float16,0,1.6784426371256511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,64,64,128,1,float16,fp8,0,1.4513920148213704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,8,64,0,1,fp8,fp8,0,3.186725298563639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,64,64,128,1,fp8,fp8,0,1.4944052696228027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,64,64,0,1,float16,fp8,0,1.677146593729655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,64,64,0,1,fp8,fp8,0,1.66428804397583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,float16,0,1.3354399998982747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,float16,0,1.5223040580749512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,1,64,128,1,float16,fp8,0,1.3317493597666423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,1,64,128,1,fp8,fp8,0,1.2951786518096924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,1,64,0,1,float16,fp8,0,1.5173759460449219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,1,64,0,1,fp8,fp8,0,1.4370773633321126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,float16,0,1.3467626571655273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,float16,0,1.532698631286621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,2,64,128,1,float16,fp8,0,1.341584046681722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,2,64,128,1,fp8,fp8,0,1.3228960037231445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,2,64,0,1,float16,fp8,0,1.530176003774007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,2,64,0,1,fp8,fp8,0,1.4640533129374187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,float16,0,1.3455840746561687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,float16,0,1.5390559832255046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,4,64,128,1,float16,fp8,0,1.3486453692118328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,4,64,128,1,fp8,fp8,0,1.3360053698221843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,4,64,0,1,float16,fp8,0,1.5315146446228027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,4,64,0,1,fp8,fp8,0,1.476784070332845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,float16,0,1.3648746808369954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,float16,0,1.5542133649190266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,8,64,128,1,float16,fp8,0,1.3646133740743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,8,64,128,1,fp8,fp8,0,1.4470453262329102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,8,64,0,1,float16,fp8,0,1.5505280494689941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,float16,0,0.7254133224487305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,8,64,0,1,fp8,fp8,0,1.5870399475097656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,64,64,128,1,float16,fp8,0,0.7116853396097819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,float16,0,0.8396853605906168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,64,64,128,1,fp8,fp8,0,0.7527573108673096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,64,64,0,1,float16,fp8,0,0.8279786904652914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,float16,0,0.6785653432210287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,64,64,0,1,fp8,fp8,0,0.8366453647613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,float16,0,0.770581324895223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,1,64,128,1,float16,fp8,0,0.6762133439381918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,1,64,128,1,fp8,fp8,0,0.6434133450190226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,1,64,0,1,float16,fp8,0,0.7686826388041178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,1,64,0,1,fp8,fp8,0,0.7172640164693197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,float16,0,0.6817813714345297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,float16,0,0.7766133149464926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,2,64,128,1,float16,fp8,0,0.6812106768290201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,2,64,128,1,fp8,fp8,0,0.6574613253275553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,2,64,0,1,fp8,fp8,0,0.7336479822794596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,2,64,0,1,float16,fp8,0,0.7731359799702963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,float16,0,0.681109348932902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,4,64,128,1,float16,fp8,0,0.6823306878407797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,float16,0,0.7782506942749023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,4,64,128,1,fp8,fp8,0,0.6554400126139323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,4,64,0,1,float16,fp8,0,0.7773706912994385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,4,64,0,1,fp8,fp8,0,0.7278133233388265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,float16,0,0.6893066565195719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,8,64,128,1,float16,fp8,0,0.6871413389841715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,float16,0,0.7898560365041097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,8,64,128,1,fp8,fp8,0,0.7132053375244141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,float16,0,0.3723893165588379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,8,64,0,1,float16,fp8,0,0.7874506314595541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,8,64,0,1,fp8,fp8,0,0.7888533274332682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,float16,0,0.430181344350179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,64,64,128,1,float16,fp8,0,0.3653973340988159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,64,64,128,1,fp8,fp8,0,0.38510934511820477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,64,64,0,1,float16,fp8,0,0.4246133168538411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,64,64,0,1,fp8,fp8,0,0.43175466855367023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,float16,0,0.3468266725540161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,1,64,128,1,fp8,fp8,0,0.3307360013326009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,1,64,128,1,float16,fp8,0,0.3468853235244751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,float16,0,0.3966826597849528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,1,64,0,1,float16,fp8,0,0.39660799503326416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,float16,0,0.3493280013402303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,1,64,0,1,fp8,fp8,0,0.36753066380818683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,2,64,128,1,float16,fp8,0,0.3473706642786662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,float16,0,0.3996800184249878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,2,64,128,1,fp8,fp8,0,0.3388906717300415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,2,64,0,1,float16,fp8,0,0.39692266782124835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,2,64,0,1,fp8,fp8,0,0.3741066853205363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,float16,0,0.35153599580128986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,float16,0,0.400490681330363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,4,64,128,1,float16,fp8,0,0.34992531935373944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,4,64,128,1,fp8,fp8,0,0.335749348004659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,4,64,0,1,float16,fp8,0,0.40117335319519043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,float16,0,0.3538506825764974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,4,64,0,1,fp8,fp8,0,0.3732373317082723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,float16,0,0.4050613244374593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,8,64,128,1,fp8,fp8,0,0.3471573193868001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,8,64,128,1,float16,fp8,0,0.3537600040435791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,8,64,0,1,float16,fp8,0,0.4028000036875407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,8,64,0,1,fp8,fp8,0,0.3856480121612549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,float16,0,0.19831466674804688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,64,64,128,1,float16,fp8,0,0.19463467597961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,float16,0,0.2307466665903727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,64,64,128,1,fp8,fp8,0,0.20259199539820352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,64,64,0,1,float16,fp8,0,0.22543466091156006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,64,64,0,1,fp8,fp8,0,0.22668800751368204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,float16,0,0.20930665731430054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,float16,0,0.18300267060597739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,1,64,128,1,float16,fp8,0,0.1827733318010966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,1,64,128,1,fp8,fp8,0,0.17564266920089722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,1,64,0,1,float16,fp8,0,0.2083466649055481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,1,64,0,1,fp8,fp8,0,0.19552532831827799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,float16,0,0.18290134270985922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,float16,0,0.2091360092163086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,2,64,128,1,float16,fp8,0,0.18348799149195352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,2,64,128,1,fp8,fp8,0,0.17746132612228394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,2,64,0,1,float16,fp8,0,0.20920000473658243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,2,64,0,1,fp8,fp8,0,0.19795199235280356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,float16,0,0.18318400780359903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,4,64,128,1,float16,fp8,0,0.18410666783650717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,float16,0,0.21093332767486572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,4,64,128,1,fp8,fp8,0,0.17826133966445923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,4,64,0,1,float16,fp8,0,0.20920532941818237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,4,64,0,1,fp8,fp8,0,0.19693867365519205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,float16,0,0.2118720014890035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,float16,0,0.18771199385325113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,8,64,128,1,float16,fp8,0,0.18409067392349243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,8,64,128,1,fp8,fp8,0,0.1798186699549357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,float16,0,0.10776533683141072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,8,64,0,1,float16,fp8,0,0.21118932962417603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,8,64,0,1,fp8,fp8,0,0.20154666900634766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,float16,0,0.12628799676895142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,64,64,128,1,float16,fp8,0,0.10644800464312236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,64,64,128,1,fp8,fp8,0,0.1120853324731191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,64,64,0,1,float16,fp8,0,0.12488533059755962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,64,64,0,1,fp8,fp8,0,0.12486400206883748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,float16,0,0.0974133312702179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,float16,0,0.11335999766985576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,1,64,128,1,float16,fp8,0,0.09799999992052714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,1,64,128,1,fp8,fp8,0,0.09333866834640503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,1,64,0,1,float16,fp8,0,0.11183999975522359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,1,64,0,1,fp8,fp8,0,0.10458667079607646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,fp8,0,0.09738666812578838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,float16,0,0.11363733808199565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,2,64,128,1,float16,float16,0,0.09834667046864827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,2,64,128,1,fp8,fp8,0,0.09342933694521587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,2,64,0,1,float16,fp8,0,0.11339199542999268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,2,64,0,1,fp8,fp8,0,0.10526399811108907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,float16,0,0.09858133395512898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,4,64,128,1,float16,fp8,0,0.09729066491127014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,float16,0,0.11333866914113362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,4,64,128,1,fp8,fp8,0,0.09567999839782715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,4,64,0,1,float16,fp8,0,0.11331199606259663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,4,64,0,1,fp8,fp8,0,0.10658666491508484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,float16,0,0.09942932923634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,8,64,128,1,float16,fp8,0,0.0993386705716451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,float16,0,0.11389333009719849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,8,64,128,1,fp8,fp8,0,0.09628267089525859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,8,64,0,1,float16,fp8,0,0.11514133214950562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,8,64,0,1,fp8,fp8,0,0.10817066828409831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,float16,0,0.0717439999183019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,fp8,0,0.061610668897628784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,64,64,128,1,float16,float16,0,0.06253866851329803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,64,64,0,1,float16,fp8,0,0.07156800230344136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,64,64,128,1,fp8,fp8,0,0.06576533118883769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,64,64,0,1,fp8,fp8,0,0.07293333113193512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,float16,0,0.060133333007494606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,float16,0,0.067930668592453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,1,64,128,1,float16,fp8,0,0.05949866771697998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,1,64,128,1,fp8,fp8,0,0.057301332553227745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,1,64,0,1,float16,fp8,0,0.06836799780527751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,1,64,0,1,fp8,fp8,0,0.06484266618887584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,float16,0,0.05860800047715505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,float16,0,0.06850133339564006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,2,64,128,1,fp8,fp8,0,0.058229332168896995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,2,64,0,1,float16,fp8,0,0.06842666864395142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,2,64,128,1,float16,fp8,0,0.059205333391825356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,2,64,0,1,fp8,fp8,0,0.06471466521422069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,float16,0,0.059808000922203064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,float16,0,0.06869333485762279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,4,64,128,1,float16,fp8,0,0.05900266766548157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,4,64,128,1,fp8,fp8,0,0.0572213331858317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,4,64,0,1,float16,fp8,0,0.06855466465155284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,4,64,0,1,fp8,fp8,0,0.06401066482067108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,float16,0,0.05865600208441416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,float16,0,0.06761600077152252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,8,64,128,1,float16,fp8,0,0.05823466678460439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,8,64,128,1,fp8,fp8,0,0.05849599838256836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,8,64,0,1,fp8,fp8,0,0.06489066779613495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,float16,0,0.03910933434963226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,8,64,0,1,float16,fp8,0,0.06796266635258992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,float16,0,0.04560000201066335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,64,64,128,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,64,64,128,1,fp8,fp8,0,0.03847466657559077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,64,64,0,1,float16,fp8,0,0.04587733248869578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,64,64,0,1,fp8,fp8,0,0.043493335445721946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,float16,0,0.03801066676775614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,float16,0,0.04460266729195913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,1,64,128,1,float16,fp8,0,0.03737066686153412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,1,64,128,1,fp8,fp8,0,0.03696000079313914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,1,64,0,1,float16,fp8,0,0.04488533238569895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,1,64,0,1,fp8,fp8,0,0.04172799984614054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,float16,0,0.04375466704368591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,2,64,128,1,float16,float16,0,0.03658666710058848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,2,64,128,1,fp8,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,2,64,0,1,float16,fp8,0,0.0444160004456838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,2,64,0,1,fp8,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,float16,0,0.03753600021203359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,float16,0,0.044549331068992615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,4,64,128,1,float16,fp8,0,0.03791466603676478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,4,64,128,1,fp8,fp8,0,0.03587199995915095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,4,64,0,1,float16,fp8,0,0.044954667488733925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,float16,0,0.037445334096749626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,float16,0,0.043824002146720886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,4,64,0,1,fp8,fp8,0,0.04201599955558777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,8,64,128,1,float16,fp8,0,0.03741333385308584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,8,64,128,1,fp8,fp8,0,0.036101333796978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,8,64,0,1,float16,fp8,0,0.04462933540344238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,8,64,0,1,fp8,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,1,64,128,1,float16,float16,0,2.5785600344340005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,1,64,0,1,float16,float16,0,2.579338709513346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,1,64,128,1,float16,fp8,0,2.5752533276875815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,1,64,128,1,fp8,fp8,0,2.5460853576660156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,1,64,0,1,float16,fp8,0,2.558997313181559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,1,64,0,1,fp8,fp8,0,2.5176639556884766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,2,64,128,1,float16,float16,0,2.600266615549723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,2,64,0,1,float16,float16,0,2.592181364695231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,2,64,128,1,float16,fp8,0,2.5880212783813477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,2,64,128,1,fp8,fp8,0,2.61954132715861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,2,64,0,1,float16,fp8,0,2.5832212766011557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,2,64,0,1,fp8,fp8,0,2.5923946698506675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,4,64,128,1,float16,float16,0,2.6901493072509766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,4,64,128,1,float16,fp8,0,2.640607992808024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,4,64,0,1,float16,float16,0,2.6707518895467124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,4,64,128,1,fp8,fp8,0,2.658565362294515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,4,64,0,1,float16,fp8,0,2.6837546030680337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,4,64,0,1,fp8,fp8,0,2.6441920598347983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,8,64,128,1,float16,float16,0,2.7301813761393228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,8,64,0,1,float16,float16,0,2.7206878662109375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,8,64,128,1,float16,fp8,0,2.7302773793538413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,8,64,128,1,fp8,fp8,0,2.8151680628458657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,8,64,0,1,float16,fp8,0,2.712736129760742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,8,64,0,1,fp8,fp8,0,2.7924585342407227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,64,64,128,1,float16,float16,0,1.4136853218078613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,64,64,0,1,float16,float16,0,1.4307467142740886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,64,64,128,1,float16,fp8,0,1.4021973609924316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,64,64,128,1,fp8,fp8,0,1.442090670267741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,64,64,0,1,float16,fp8,0,1.4399147033691406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,64,64,0,1,fp8,fp8,0,1.4545812606811523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,1,64,128,1,float16,float16,0,1.29912535349528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,1,64,0,1,float16,float16,0,1.2948533693949382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,1,64,128,1,float16,fp8,0,1.295792023340861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,1,64,128,1,fp8,fp8,0,1.2578667004903157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,1,64,0,1,float16,fp8,0,1.2913973331451416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,1,64,0,1,fp8,fp8,0,1.2404106458028157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,2,64,128,1,float16,float16,0,1.3061119715372722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,2,64,0,1,float16,float16,0,1.3092052936553955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,2,64,128,1,float16,fp8,0,1.3053119977315266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,2,64,128,1,fp8,fp8,0,1.2914506594340007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,2,64,0,1,float16,fp8,0,1.3047839800516765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,2,64,0,1,fp8,fp8,0,1.2709866364796956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,4,64,128,1,float16,float16,0,1.3138720194498699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,4,64,128,1,float16,fp8,0,1.3080480098724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,4,64,0,1,float16,float16,0,1.3074133396148682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,4,64,128,1,fp8,fp8,0,1.3025120099385579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,4,64,0,1,float16,fp8,0,1.3138186931610107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,4,64,0,1,fp8,fp8,0,1.2851520379384358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,8,64,128,1,float16,float16,0,1.3236107031504314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,8,64,0,1,float16,float16,0,1.3288319905598958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,8,64,128,1,float16,fp8,0,1.319258689880371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,8,64,128,1,fp8,fp8,0,1.4084266026814778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,8,64,0,1,float16,fp8,0,1.315018653869629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,8,64,0,1,fp8,fp8,0,1.3994720776875813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,64,64,128,1,float16,float16,0,0.7062826951344808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,64,64,0,1,float16,float16,0,0.7133653163909912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,64,64,128,1,float16,fp8,0,0.6973866621653239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,64,64,0,1,float16,fp8,0,0.7073973019917806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,64,64,128,1,fp8,fp8,0,0.7228000164031982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,1,64,128,1,float16,float16,0,0.6608853340148926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,64,64,0,1,fp8,fp8,0,0.7284266948699951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,1,64,0,1,float16,float16,0,0.661349336306254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,1,64,128,1,float16,fp8,0,0.6566880146662394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,1,64,128,1,fp8,fp8,0,0.6260693470637003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,1,64,0,1,float16,fp8,0,0.6565119822820028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,1,64,0,1,fp8,fp8,0,0.6205333471298218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,2,64,128,1,float16,float16,0,0.6657386620839437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,2,64,0,1,float16,float16,0,0.6627786556879679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,2,64,128,1,float16,fp8,0,0.6611040035883585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,2,64,128,1,fp8,fp8,0,0.6403359969456991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,2,64,0,1,fp8,fp8,0,0.6310933430989584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,2,64,0,1,float16,fp8,0,0.6622453530629476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,4,64,128,1,float16,float16,0,0.6650613149007162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,4,64,0,1,float16,float16,0,0.6662240028381348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,4,64,128,1,float16,fp8,0,0.6639466683069865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,4,64,128,1,fp8,fp8,0,0.6398186683654785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,4,64,0,1,float16,fp8,0,0.6648746728897095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,4,64,0,1,fp8,fp8,0,0.629477341969808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,8,64,128,1,float16,float16,0,0.6687946319580078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,8,64,0,1,float16,float16,0,0.6723039944966634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,8,64,128,1,float16,fp8,0,0.667738676071167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,64,64,128,1,float16,float16,0,0.3615573247273763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,8,64,128,1,fp8,fp8,0,0.6967626412709554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,8,64,0,1,float16,fp8,0,0.6697813669840494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,8,64,0,1,fp8,fp8,0,0.6899573008219401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,64,64,0,1,float16,float16,0,0.36637866497039795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,64,64,128,1,float16,fp8,0,0.35762667655944824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,64,64,128,1,fp8,fp8,0,0.3758346637090047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,64,64,0,1,float16,fp8,0,0.3623146613438924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,64,64,0,1,fp8,fp8,0,0.37275199095408124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,1,64,128,1,float16,float16,0,0.3405119975407918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,1,64,0,1,float16,float16,0,0.33914132912953693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,1,64,128,1,float16,fp8,0,0.33693333466847736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,1,64,128,1,fp8,fp8,0,0.3221013347307841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,1,64,0,1,float16,fp8,0,0.3375786542892456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,1,64,0,1,fp8,fp8,0,0.31917333602905273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,2,64,128,1,float16,float16,0,0.33979201316833496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,2,64,0,1,float16,float16,0,0.33990931510925293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,2,64,128,1,float16,fp8,0,0.3382026751836141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,2,64,128,1,fp8,fp8,0,0.32876267035802204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,2,64,0,1,fp8,fp8,0,0.3266773422559102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,2,64,0,1,float16,fp8,0,0.33930134773254395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,4,64,128,1,float16,float16,0,0.3407680193583171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,4,64,0,1,float16,float16,0,0.33992000420888263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,4,64,128,1,float16,fp8,0,0.3401600122451782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,4,64,128,1,fp8,fp8,0,0.3288533290227254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,4,64,0,1,float16,fp8,0,0.3399413426717122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,4,64,0,1,fp8,fp8,0,0.32387200991312665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,8,64,128,1,float16,float16,0,0.34468265374501544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,8,64,128,1,float16,fp8,0,0.3440106709798177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,8,64,0,1,float16,float16,0,0.3447466691335042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,8,64,128,1,fp8,fp8,0,0.33718931674957275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,64,64,128,1,float16,float16,0,0.19261866807937622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,8,64,0,1,float16,fp8,0,0.34388267993927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,64,64,0,1,float16,float16,0,0.1965279976526896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,8,64,0,1,fp8,fp8,0,0.33582401275634766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,64,64,128,1,fp8,fp8,0,0.1977120041847229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,64,64,0,1,float16,fp8,0,0.191429336865743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,64,64,128,1,float16,fp8,0,0.1911626656850179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,64,64,0,1,fp8,fp8,0,0.1988853414853414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,1,64,0,1,float16,float16,0,0.17747199535369873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,1,64,128,1,float16,float16,0,0.1779573361078898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,1,64,128,1,fp8,fp8,0,0.17117865880330405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,1,64,128,1,float16,fp8,0,0.17845332622528076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,1,64,0,1,float16,fp8,0,0.17775466044743857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,1,64,0,1,fp8,fp8,0,0.1686720053354899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,2,64,128,1,float16,float16,0,0.17904533942540488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,2,64,0,1,float16,float16,0,0.17925333976745605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,2,64,128,1,float16,fp8,0,0.177130659421285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,2,64,128,1,fp8,fp8,0,0.1730133295059204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,2,64,0,1,float16,fp8,0,0.17893866697947183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,2,64,0,1,fp8,fp8,0,0.17094933986663818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,4,64,128,1,float16,float16,0,0.1791093349456787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,4,64,0,1,float16,float16,0,0.17941333850224814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,4,64,128,1,float16,fp8,0,0.1795466740926107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,4,64,128,1,fp8,fp8,0,0.17326400677363077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,4,64,0,1,fp8,fp8,0,0.17087999979654947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,4,64,0,1,float16,fp8,0,0.1788426637649536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,8,64,128,1,float16,float16,0,0.18040533860524496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,8,64,0,1,float16,float16,0,0.18094933032989502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,8,64,128,1,fp8,fp8,0,0.1744800011316935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,8,64,128,1,float16,fp8,0,0.18055999279022217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,8,64,0,1,float16,fp8,0,0.18051733573277792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,8,64,0,1,fp8,fp8,0,0.17325333754221597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,64,64,128,1,float16,float16,0,0.10477333267529805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,64,64,0,1,float16,float16,0,0.1072213351726532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,64,64,128,1,float16,fp8,0,0.10338133573532104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,64,64,128,1,fp8,fp8,0,0.10956266522407532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,64,64,0,1,float16,fp8,0,0.10533866286277771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,64,64,0,1,fp8,fp8,0,0.10738133390744527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,1,64,128,1,float16,float16,0,0.09707732995351155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,1,64,0,1,float16,float16,0,0.0950879951318105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,1,64,128,1,float16,fp8,0,0.09552533427874248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,1,64,128,1,fp8,fp8,0,0.09292266766230266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,1,64,0,1,float16,fp8,0,0.09639466802279155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,1,64,0,1,fp8,fp8,0,0.09093333284060161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,2,64,0,1,float16,float16,0,0.09526933232943217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,2,64,128,1,float16,fp8,0,0.09526933232943217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,2,64,128,1,float16,float16,0,0.09639466802279155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,2,64,128,1,fp8,fp8,0,0.0916426678498586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,2,64,0,1,float16,fp8,0,0.09624532858530681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,2,64,0,1,fp8,fp8,0,0.09168533484141032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,4,64,128,1,float16,float16,0,0.0953439970811208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,4,64,0,1,float16,float16,0,0.09506666660308838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,4,64,128,1,float16,fp8,0,0.09604266285896301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,4,64,128,1,fp8,fp8,0,0.09340799848238628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,4,64,0,1,float16,fp8,0,0.09644800424575806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,4,64,0,1,fp8,fp8,0,0.09288000067075093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,8,64,0,1,float16,float16,0,0.09637332955996196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,8,64,128,1,float16,float16,0,0.09699199597040813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,8,64,128,1,float16,fp8,0,0.09756267070770264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,8,64,128,1,fp8,fp8,0,0.09296000003814697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,8,64,0,1,float16,fp8,0,0.09727467099825542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,64,64,0,1,float16,float16,0,0.06204266846179962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,64,64,128,1,float16,float16,0,0.06165866553783417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,8,64,0,1,fp8,fp8,0,0.09324799974759419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,64,64,128,1,float16,fp8,0,0.06027733286221822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,64,64,128,1,fp8,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,64,64,0,1,float16,fp8,0,0.060229331254959106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,64,64,0,1,fp8,fp8,0,0.062650665640831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,1,64,128,1,float16,float16,0,0.05712000032265981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,1,64,0,1,float16,float16,0,0.056559999783833824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,1,64,128,1,fp8,fp8,0,0.055914665261904396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,1,64,128,1,float16,fp8,0,0.05745600163936615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,1,64,0,1,float16,fp8,0,0.05653333167235056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,1,64,0,1,fp8,fp8,0,0.05624533196290334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,2,64,128,1,float16,float16,0,0.05741333464781443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,2,64,0,1,float16,float16,0,0.05821333328882853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,2,64,128,1,float16,fp8,0,0.057962665955225624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,2,64,128,1,fp8,fp8,0,0.056613331039746605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,2,64,0,1,fp8,fp8,0,0.05588266750176748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,2,64,0,1,float16,fp8,0,0.058592001597086586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,4,64,128,1,float16,float16,0,0.05804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,4,64,0,1,float16,float16,0,0.05809600154558817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,4,64,128,1,fp8,fp8,0,0.05730666716893514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,4,64,128,1,float16,fp8,0,0.058330665032068886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,4,64,0,1,float16,fp8,0,0.05760000149408976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,8,64,128,1,float16,float16,0,0.05850133299827576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,4,64,0,1,fp8,fp8,0,0.0561706672112147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,8,64,0,1,float16,float16,0,0.05680533250172933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,8,64,128,1,float16,fp8,0,0.05677866439024607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,8,64,128,1,fp8,fp8,0,0.05646933118502299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,8,64,0,1,float16,fp8,0,0.0562666654586792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,8,64,0,1,fp8,fp8,0,0.05523733297983805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,64,64,0,1,float16,float16,0,0.03775466730197271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,64,64,128,1,float16,float16,0,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,64,64,128,1,float16,fp8,0,0.03850133220354716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,64,64,128,1,fp8,fp8,0,0.038058665891488395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,64,64,0,1,float16,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,1,64,128,1,float16,float16,0,0.037733333806196846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,64,64,0,1,fp8,fp8,0,0.038906666139761605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,1,64,0,1,float16,float16,0,0.037471999724706016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,1,64,128,1,fp8,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,1,64,128,1,float16,fp8,0,0.036159999668598175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,1,64,0,1,fp8,fp8,0,0.03608533243338267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,1,64,0,1,float16,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,2,64,128,1,float16,float16,0,0.03625066578388214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,2,64,0,1,float16,float16,0,0.03811733424663544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,2,64,128,1,float16,fp8,0,0.03745600084463755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,2,64,128,1,fp8,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,2,64,0,1,float16,fp8,0,0.03736000011364619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,4,64,128,1,float16,float16,0,0.03618133316437403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,4,64,0,1,float16,float16,0,0.03745066622893015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,2,64,0,1,fp8,fp8,0,0.03659199923276901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,4,64,128,1,float16,fp8,0,0.03682133307059606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,4,64,128,1,fp8,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,4,64,0,1,float16,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,4,64,0,1,fp8,fp8,0,0.03611200054486593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,8,64,128,1,float16,float16,0,0.037317333122094475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,8,64,0,1,float16,float16,0,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,8,64,128,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,8,64,128,1,float16,fp8,0,0.03603733330965042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,8,64,0,1,float16,fp8,0,0.03787733366092046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,8,64,0,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,64,64,128,1,float16,float16,0,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,64,64,0,1,float16,float16,0,0.026378666361172993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,64,64,128,1,float16,fp8,0,0.02626666675011317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,64,64,0,1,float16,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,64,64,128,1,fp8,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,64,64,0,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,1,64,128,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,1,64,0,1,float16,float16,0,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,1,64,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,1,64,128,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,1,64,0,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,1,64,0,1,float16,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,2,64,128,1,float16,float16,0,0.02585600068171819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,2,64,0,1,float16,float16,0,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,2,64,128,1,float16,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,2,64,128,1,fp8,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,2,64,0,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,4,64,128,1,float16,float16,0,0.025018667181332905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,2,64,0,1,fp8,fp8,0,0.024314666787783306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,4,64,0,1,float16,float16,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,4,64,128,1,fp8,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,4,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,4,64,0,1,float16,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,4,64,0,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,8,64,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,8,64,128,1,float16,float16,0,0.02456533412138621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,8,64,128,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,8,64,128,1,fp8,fp8,0,0.02569599946339925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,8,64,0,1,float16,fp8,0,0.025637333591779072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,8,64,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,1,64,128,1,float16,float16,0,1.214629332224528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,1,64,0,1,float16,float16,0,1.1916426817576091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,1,64,128,1,float16,fp8,0,1.2107306321461995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,1,64,128,1,fp8,fp8,0,1.1669653256734211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,1,64,0,1,float16,fp8,0,1.186463991800944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,1,64,0,1,fp8,fp8,0,1.1380106608072917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,2,64,128,1,float16,float16,0,1.2117386658986409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,2,64,0,1,float16,float16,0,1.1847679615020752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,2,64,128,1,float16,fp8,0,1.209381341934204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,2,64,0,1,float16,fp8,0,1.1761226654052734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,2,64,0,1,fp8,fp8,0,1.1604959964752197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,2,64,128,1,fp8,fp8,0,1.1951253414154053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,4,64,128,1,float16,float16,0,1.2183573246002197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,4,64,0,1,float16,float16,0,1.1897652943929036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,4,64,128,1,fp8,fp8,0,1.192799965540568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,4,64,128,1,float16,fp8,0,1.2151892979939778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,4,64,0,1,float16,fp8,0,1.1868159770965576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,4,64,0,1,fp8,fp8,0,1.1655466556549072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,8,64,128,1,float16,float16,0,1.2334667046864827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,8,64,0,1,float16,float16,0,1.2285652955373128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,8,64,128,1,float16,fp8,0,1.2246613502502441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,64,64,128,1,float16,float16,0,0.6625333229700724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,8,64,128,1,fp8,fp8,0,1.3009599844614665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,8,64,0,1,float16,fp8,0,1.1958399613698323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,8,64,0,1,fp8,fp8,0,1.27347731590271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,64,64,0,1,float16,float16,0,0.6457066535949707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,64,64,128,1,float16,fp8,0,0.6519999901453654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,64,64,128,1,fp8,fp8,0,0.6843039989471436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,64,64,0,1,float16,fp8,0,0.640720009803772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,64,64,0,1,fp8,fp8,0,0.6642719904581705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,1,64,128,1,float16,float16,0,0.6133706569671631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,1,64,0,1,float16,float16,0,0.6022773186365763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,1,64,128,1,float16,fp8,0,0.6119893391927084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,1,64,0,1,float16,fp8,0,0.5985386768976847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,1,64,128,1,fp8,fp8,0,0.5825546582539877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,1,64,0,1,fp8,fp8,0,0.564079999923706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,2,64,128,1,float16,float16,0,0.6130773226420084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,2,64,0,1,float16,float16,0,0.5997386773427328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,2,64,128,1,fp8,fp8,0,0.5953066746393839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,2,64,128,1,float16,fp8,0,0.6124586661656698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,2,64,0,1,float16,fp8,0,0.5967359940210978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,2,64,0,1,fp8,fp8,0,0.5777173439661661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,4,64,128,1,float16,float16,0,0.6187413136164347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,4,64,0,1,float16,float16,0,0.6033813158671061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,4,64,128,1,float16,fp8,0,0.6155413389205933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,4,64,128,1,fp8,fp8,0,0.5922400156656901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,4,64,0,1,float16,fp8,0,0.6041173140207926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,4,64,0,1,fp8,fp8,0,0.5767360130945841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,8,64,128,1,float16,float16,0,0.6242133378982544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,8,64,0,1,float16,float16,0,0.609221339225769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,8,64,128,1,float16,fp8,0,0.621562679608663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,8,64,128,1,fp8,fp8,0,0.6435253222783407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,8,64,0,1,float16,fp8,0,0.6055839856465658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,64,64,128,1,float16,float16,0,0.33898667494455975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,64,64,0,1,float16,float16,0,0.33244800567626953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,64,64,128,1,float16,fp8,0,0.3348906834920247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,8,64,0,1,fp8,fp8,0,0.624672015508016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,64,64,128,1,fp8,fp8,0,0.3520853519439697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,64,64,0,1,float16,fp8,0,0.3280426661173503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,64,64,0,1,fp8,fp8,0,0.3441280126571655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,1,64,128,1,float16,float16,0,0.31338133414586383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,1,64,0,1,float16,float16,0,0.3067946632703145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,1,64,128,1,fp8,fp8,0,0.2998826702435811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,1,64,0,1,float16,fp8,0,0.3047306736310323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,1,64,128,1,float16,fp8,0,0.315829336643219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,1,64,0,1,fp8,fp8,0,0.2916959921518962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,2,64,128,1,float16,float16,0,0.31406400601069134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,2,64,0,1,float16,float16,0,0.3080853422482808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,2,64,128,1,float16,fp8,0,0.3124213417371114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,2,64,128,1,fp8,fp8,0,0.3051466743151347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,2,64,0,1,float16,fp8,0,0.3064799904823303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,2,64,0,1,fp8,fp8,0,0.29813865820566815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,4,64,128,1,float16,float16,0,0.31596267223358154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,4,64,0,1,float16,float16,0,0.3088746666908264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,4,64,128,1,float16,fp8,0,0.3139680027961731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,4,64,128,1,fp8,fp8,0,0.3064746658007304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,4,64,0,1,float16,fp8,0,0.3083626627922058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,4,64,0,1,fp8,fp8,0,0.29761600494384766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,8,64,128,1,float16,float16,0,0.3198453386624654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,8,64,0,1,float16,float16,0,0.3141973416010539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,8,64,128,1,float16,fp8,0,0.3192746639251709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,8,64,128,1,fp8,fp8,0,0.31464000542958576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,64,64,128,1,float16,float16,0,0.18297600746154785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,8,64,0,1,float16,fp8,0,0.3127466638882955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,8,64,0,1,fp8,fp8,0,0.30771199862162274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,64,64,0,1,float16,float16,0,0.1792800029118856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,64,64,128,1,float16,fp8,0,0.17939200003941855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,64,64,128,1,fp8,fp8,0,0.18877333402633667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,64,64,0,1,float16,fp8,0,0.17532267173131308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,1,64,128,1,float16,float16,0,0.1667893330256144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,64,64,0,1,fp8,fp8,0,0.18290666739145914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,1,64,0,1,float16,float16,0,0.16408000389734903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,1,64,128,1,float16,fp8,0,0.16619199514389038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,1,64,128,1,fp8,fp8,0,0.1590826710065206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,1,64,0,1,float16,fp8,0,0.16225066781044006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,1,64,0,1,fp8,fp8,0,0.1544319987297058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,2,64,128,1,float16,float16,0,0.1653279960155487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,2,64,0,1,float16,float16,0,0.16184533635775247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,2,64,128,1,float16,fp8,0,0.16472533345222473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,2,64,128,1,fp8,fp8,0,0.16127999623616537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,2,64,0,1,float16,fp8,0,0.16236799955368042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,2,64,0,1,fp8,fp8,0,0.15588266650835672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,4,64,128,1,float16,float16,0,0.1670773426691691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,4,64,0,1,float16,float16,0,0.16365333398183188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,4,64,128,1,float16,fp8,0,0.1681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,4,64,128,1,fp8,fp8,0,0.16100266575813293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,4,64,0,1,fp8,fp8,0,0.15660267074902853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,4,64,0,1,float16,fp8,0,0.16331733266512552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,8,64,128,1,float16,float16,0,0.16876266400019327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,8,64,0,1,float16,float16,0,0.165093332529068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,8,64,128,1,float16,fp8,0,0.1693920095761617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,8,64,128,1,fp8,fp8,0,0.1644266645113627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,8,64,0,1,float16,fp8,0,0.16406400005022684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,8,64,0,1,fp8,fp8,0,0.1590986649195353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,64,64,128,1,float16,float16,0,0.09920533498128255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,64,64,0,1,float16,float16,0,0.09663466612497966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,64,64,128,1,float16,fp8,0,0.09718933701515198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,64,64,0,1,float16,fp8,0,0.09521067142486572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,64,64,128,1,fp8,fp8,0,0.10291199882825215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,64,64,0,1,fp8,fp8,0,0.10014933347702026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,1,64,128,1,float16,float16,0,0.08913600444793701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,1,64,0,1,float16,float16,0,0.08636266986529033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,1,64,128,1,fp8,fp8,0,0.08640000224113464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,1,64,128,1,float16,fp8,0,0.08897067109743755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,1,64,0,1,float16,fp8,0,0.08725866675376892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,1,64,0,1,fp8,fp8,0,0.08232533435026805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,2,64,128,1,float16,float16,0,0.08874133229255676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,2,64,128,1,float16,fp8,0,0.08919466535250346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,2,64,0,1,float16,float16,0,0.08627200126647949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,2,64,128,1,fp8,fp8,0,0.08647466699282329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,2,64,0,1,float16,fp8,0,0.08686932921409607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,2,64,0,1,fp8,fp8,0,0.08261866867542267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,4,64,128,1,float16,float16,0,0.09075733025868733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,4,64,0,1,float16,float16,0,0.087226668993632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,4,64,128,1,float16,fp8,0,0.08879466851552327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,4,64,128,1,fp8,fp8,0,0.08589866757392883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,4,64,0,1,fp8,fp8,0,0.08543999989827473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,4,64,0,1,float16,fp8,0,0.08732266227404277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,8,64,128,1,float16,float16,0,0.09058133761088054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,8,64,0,1,float16,float16,0,0.08772800366083781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,8,64,128,1,float16,fp8,0,0.09085866808891296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,8,64,0,1,float16,fp8,0,0.08732266227404277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,8,64,0,1,fp8,fp8,0,0.08468266328175862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,8,64,128,1,fp8,fp8,0,0.08910399675369263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,64,64,128,1,float16,float16,0,0.05649066468079885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,64,64,0,1,float16,float16,0,0.05605333546797434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,64,64,128,1,float16,fp8,0,0.05753066639105479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,64,64,128,1,fp8,fp8,0,0.06132266422112783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,64,64,0,1,float16,fp8,0,0.054602667689323425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,64,64,0,1,fp8,fp8,0,0.059989333152770996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,1,64,128,1,float16,float16,0,0.05409599840641022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,1,64,128,1,float16,fp8,0,0.05389333268006643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,1,64,0,1,float16,float16,0,0.05291733145713806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,1,64,0,1,float16,fp8,0,0.05438933273156484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,1,64,128,1,fp8,fp8,0,0.05392000079154968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,1,64,0,1,fp8,fp8,0,0.05077866713205973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,2,64,128,1,float16,float16,0,0.054671997825304665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,2,64,0,1,float16,float16,0,0.05381333331267039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,2,64,128,1,float16,fp8,0,0.05420266588528951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,2,64,128,1,fp8,fp8,0,0.053743998209635414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,4,64,128,1,float16,float16,0,0.05402666827042898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,2,64,0,1,fp8,fp8,0,0.051301335295041404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,2,64,0,1,float16,fp8,0,0.05308799942334493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,4,64,0,1,float16,float16,0,0.0537120004494985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,4,64,128,1,float16,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,4,64,128,1,fp8,fp8,0,0.05457599957784017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,4,64,0,1,float16,fp8,0,0.053690666953722634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,4,64,0,1,fp8,fp8,0,0.05201066533724467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,8,64,0,1,float16,float16,0,0.053247998158137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,8,64,128,1,float16,float16,0,0.053914666175842285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,8,64,128,1,float16,fp8,0,0.05481599768002828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,8,64,128,1,fp8,fp8,0,0.053354665637016296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,8,64,0,1,float16,fp8,0,0.05231466889381409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,8,64,0,1,fp8,fp8,0,0.052330667773882546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,64,64,128,1,float16,float16,0,0.03531199942032496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,64,64,128,1,float16,fp8,0,0.03549333413441976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,64,64,0,1,float16,float16,0,0.034058667719364166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,64,64,128,1,fp8,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,64,64,0,1,float16,fp8,0,0.03435733417669932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,1,64,128,1,float16,float16,0,0.035461333890755974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,64,64,0,1,fp8,fp8,0,0.03422933320204417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,1,64,0,1,float16,float16,0,0.03374933451414108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,1,64,128,1,float16,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,1,64,128,1,fp8,fp8,0,0.033973333736260734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,1,64,0,1,float16,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,1,64,0,1,fp8,fp8,0,0.03193599979082743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,2,64,128,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,2,64,0,1,float16,float16,0,0.03387733300526937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,2,64,128,1,float16,fp8,0,0.03367999941110611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,2,64,0,1,float16,fp8,0,0.03356799980004629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,2,64,128,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,2,64,0,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,4,64,128,1,float16,float16,0,0.035418666899204254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,4,64,128,1,float16,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,4,64,0,1,float16,float16,0,0.03385066737731298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,4,64,0,1,float16,fp8,0,0.03352533280849457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,4,64,128,1,fp8,fp8,0,0.033520000676314034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,4,64,0,1,fp8,fp8,0,0.032170665760835014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,8,64,0,1,float16,float16,0,0.03330666571855545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,8,64,128,1,float16,float16,0,0.03542399903138479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,8,64,128,1,float16,fp8,0,0.03403733422358831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,8,64,0,1,float16,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,8,64,128,1,fp8,fp8,0,0.03365866591533025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,64,64,128,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,64,64,0,1,float16,float16,0,0.022965334355831146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,8,64,0,1,fp8,fp8,0,0.033904001116752625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,64,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,64,64,128,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,64,64,0,1,float16,fp8,0,0.023658665517965954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,64,64,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,1,64,128,1,float16,float16,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,1,64,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,1,64,128,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,1,64,128,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,1,64,0,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,1,64,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,2,64,128,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,2,64,128,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,2,64,0,1,float16,float16,0,0.021840001145998638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,2,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,2,64,0,1,float16,fp8,0,0.021733333667119343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,4,64,128,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,2,64,0,1,fp8,fp8,0,0.022757334013779957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,4,64,0,1,float16,float16,0,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,4,64,128,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,4,64,128,1,fp8,fp8,0,0.02367466688156128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,4,64,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,4,64,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,8,64,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,8,64,128,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,8,64,128,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,8,64,128,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,8,64,0,1,fp8,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,8,64,0,1,float16,fp8,0,0.02362666775782903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,64,64,128,1,float16,float16,0,0.020879998803138733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,64,64,0,1,float16,float16,0,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,64,64,128,1,float16,fp8,0,0.021690666675567627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,64,64,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,64,64,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,1,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,64,64,0,1,fp8,fp8,0,0.019626667102177937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,1,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,1,64,128,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,1,64,0,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,1,64,128,1,fp8,fp8,0,0.01966399947802226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,1,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,2,64,128,1,float16,float16,0,0.020186666399240494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,2,64,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,2,64,128,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,2,64,128,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,2,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,2,64,0,1,float16,fp8,0,0.01972266659140587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,4,64,128,1,float16,float16,0,0.019744000087181728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,4,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,4,64,128,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,4,64,128,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,4,64,0,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,8,64,128,1,float16,float16,0,0.020901332298914593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,4,64,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,8,64,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,8,64,128,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,8,64,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,8,64,128,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,8,64,0,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,1,64,0,1,float16,float16,0,0.6370720068613688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,1,64,128,1,float16,fp8,0,0.6330506801605225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,1,64,128,1,float16,float16,0,0.6370453437169393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,1,64,128,1,fp8,fp8,0,0.61189866065979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,1,64,0,1,float16,fp8,0,0.6308586597442627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,1,64,0,1,fp8,fp8,0,0.6101866563161215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,2,64,0,1,float16,float16,0,0.6343520085016886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,2,64,128,1,float16,float16,0,0.6360479990641276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,2,64,128,1,float16,fp8,0,0.6355093320210775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,2,64,128,1,fp8,fp8,0,0.619375983874003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,2,64,0,1,float16,fp8,0,0.6327040195465088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,2,64,0,1,fp8,fp8,0,0.620037317276001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,4,64,128,1,float16,float16,0,0.6438080072402954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,4,64,0,1,float16,float16,0,0.6410666704177856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,4,64,128,1,float16,fp8,0,0.6349600156148275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,4,64,128,1,fp8,fp8,0,0.6143946647644043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,4,64,0,1,float16,fp8,0,0.6358720064163208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,4,64,0,1,fp8,fp8,0,0.613103985786438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,8,64,128,1,float16,float16,0,0.6470186710357666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,8,64,0,1,float16,float16,0,0.6461066802342733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,8,64,128,1,float16,fp8,0,0.6378986835479736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,64,64,128,1,float16,float16,0,0.35073598225911456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,8,64,0,1,float16,fp8,0,0.6402986844380697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,8,64,128,1,fp8,fp8,0,0.6642400026321411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,8,64,0,1,fp8,fp8,0,0.6638133525848389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,64,64,0,1,float16,float16,0,0.3513600031534831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,64,64,128,1,float16,fp8,0,0.3442506790161133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,64,64,128,1,fp8,fp8,0,0.36160000165303546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,64,64,0,1,float16,fp8,0,0.3458453416824341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,64,64,0,1,fp8,fp8,0,0.36001598834991455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,1,64,128,1,float16,float16,0,0.3269919951756795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,1,64,128,1,float16,fp8,0,0.32523200909296673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,1,64,0,1,float16,float16,0,0.32586665948232013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,1,64,128,1,fp8,fp8,0,0.31245332956314087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,1,64,0,1,float16,fp8,0,0.32450666030248004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,2,64,128,1,float16,float16,0,0.324560006459554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,1,64,0,1,fp8,fp8,0,0.3116533358891805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,2,64,0,1,float16,float16,0,0.3249280055363973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,2,64,128,1,float16,fp8,0,0.32361600796381634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,2,64,128,1,fp8,fp8,0,0.31756800413131714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,2,64,0,1,float16,fp8,0,0.3232799967130025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,2,64,0,1,fp8,fp8,0,0.3171306649843852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,4,64,128,1,float16,float16,0,0.32707732915878296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,4,64,0,1,float16,float16,0,0.3267893393834432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,4,64,128,1,float16,fp8,0,0.3255786697069804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,4,64,128,1,fp8,fp8,0,0.31403199831644696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,4,64,0,1,float16,fp8,0,0.32410667339960736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,4,64,0,1,fp8,fp8,0,0.31466132402420044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,8,64,0,1,float16,float16,0,0.3322933316230774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,8,64,128,1,float16,float16,0,0.3318079908688863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,8,64,128,1,float16,fp8,0,0.3267573316891988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,8,64,128,1,fp8,fp8,0,0.3255839943885803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,64,64,128,1,float16,float16,0,0.185589333375295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,8,64,0,1,float16,fp8,0,0.32784533500671387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,8,64,0,1,fp8,fp8,0,0.32474666833877563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,64,64,0,1,float16,float16,0,0.18490666151046753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,64,64,128,1,float16,fp8,0,0.18186134099960327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,64,64,128,1,fp8,fp8,0,0.18852800130844116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,64,64,0,1,float16,fp8,0,0.18163732687632242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,64,64,0,1,fp8,fp8,0,0.1897439956665039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,1,64,128,1,float16,float16,0,0.17139200369517008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,1,64,0,1,float16,float16,0,0.17097065846125284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,1,64,128,1,float16,fp8,0,0.1712906757990519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,1,64,128,1,fp8,fp8,0,0.16475733121236166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,1,64,0,1,float16,fp8,0,0.1715466578801473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,1,64,0,1,fp8,fp8,0,0.16331733266512552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,2,64,128,1,float16,float16,0,0.1723466714223226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,2,64,0,1,float16,float16,0,0.17122666041056314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,2,64,128,1,float16,fp8,0,0.1697439948717753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,2,64,128,1,fp8,fp8,0,0.16504533092180887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,2,64,0,1,fp8,fp8,0,0.16539733608563742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,4,64,128,1,float16,float16,0,0.170906662940979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,2,64,0,1,float16,fp8,0,0.17125866810480753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,4,64,0,1,float16,float16,0,0.17273066441218057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,4,64,128,1,float16,fp8,0,0.17269867658615112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,4,64,128,1,fp8,fp8,0,0.16491199533144632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,4,64,0,1,float16,fp8,0,0.17203199863433838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,4,64,0,1,fp8,fp8,0,0.1653493344783783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,8,64,128,1,float16,float16,0,0.1735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,8,64,0,1,float16,float16,0,0.17281067371368408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,8,64,128,1,float16,fp8,0,0.17336533466974893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,8,64,128,1,fp8,fp8,0,0.17001599073410034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,8,64,0,1,fp8,fp8,0,0.16915200153986612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,8,64,0,1,float16,fp8,0,0.17250667015711466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,64,64,128,1,float16,float16,0,0.10121066371599834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,64,64,0,1,float16,float16,0,0.10134933392206828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,64,64,128,1,float16,fp8,0,0.09930666287740071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,64,64,128,1,fp8,fp8,0,0.10457600156466167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,64,64,0,1,fp8,fp8,0,0.10315199693044026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,64,64,0,1,float16,fp8,0,0.09896000226338704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,1,64,0,1,float16,float16,0,0.09289600451787312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,1,64,128,1,float16,float16,0,0.09268266956011455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,1,64,128,1,float16,fp8,0,0.09099733829498291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,1,64,128,1,fp8,fp8,0,0.08729599912961324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,1,64,0,1,float16,fp8,0,0.09123733639717102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,1,64,0,1,fp8,fp8,0,0.0885759989420573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,2,64,128,1,float16,float16,0,0.09122666716575623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,2,64,0,1,float16,float16,0,0.09156800309816997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,2,64,128,1,float16,fp8,0,0.09108266234397888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,2,64,128,1,fp8,fp8,0,0.08782399694124858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,2,64,0,1,float16,fp8,0,0.09089066584904988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,2,64,0,1,fp8,fp8,0,0.08705600102742513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,4,64,128,1,float16,float16,0,0.09202133615811665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,4,64,0,1,float16,float16,0,0.09307199716567993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,4,64,128,1,float16,fp8,0,0.0925600032011668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,4,64,128,1,fp8,fp8,0,0.09020800391832988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,4,64,0,1,float16,fp8,0,0.09289066990216573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,4,64,0,1,fp8,fp8,0,0.08909866213798523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,8,64,128,1,float16,float16,0,0.09458133578300476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,8,64,0,1,float16,float16,0,0.09339732925097148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,8,64,128,1,float16,fp8,0,0.09342400232950847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,8,64,128,1,fp8,fp8,0,0.09129599730173747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,8,64,0,1,float16,fp8,0,0.09309867024421692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,8,64,0,1,fp8,fp8,0,0.09173333644866943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,64,64,0,1,float16,float16,0,0.058042665322621666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,64,64,128,1,float16,float16,0,0.0562720000743866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,64,64,128,1,float16,fp8,0,0.05789333085219065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,64,64,128,1,fp8,fp8,0,0.06062399844328562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,64,64,0,1,float16,fp8,0,0.0563679983218511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,64,64,0,1,fp8,fp8,0,0.060458665092786155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,1,64,128,1,float16,float16,0,0.0547626664241155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,1,64,0,1,float16,float16,0,0.05458133419354757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,1,64,128,1,float16,fp8,0,0.05429866909980774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,1,64,128,1,fp8,fp8,0,0.05358933409055074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,1,64,0,1,float16,fp8,0,0.05393599967161814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,1,64,0,1,fp8,fp8,0,0.05204799771308899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,2,64,128,1,float16,float16,0,0.05379733443260193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,2,64,0,1,float16,float16,0,0.05421333511670431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,2,64,128,1,float16,fp8,0,0.054133335749308266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,2,64,128,1,fp8,fp8,0,0.053226664662361145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,2,64,0,1,float16,fp8,0,0.054042667150497437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,2,64,0,1,fp8,fp8,0,0.05397866666316986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,4,64,128,1,float16,float16,0,0.05505600074927012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,4,64,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,4,64,128,1,float16,fp8,0,0.05494399865468343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,4,64,128,1,fp8,fp8,0,0.054154664278030396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,4,64,0,1,float16,fp8,0,0.05481599768002828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,4,64,0,1,fp8,fp8,0,0.05258133510748545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,8,64,128,1,float16,float16,0,0.05406400064627329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,8,64,0,1,float16,float16,0,0.054325332244237266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,8,64,128,1,float16,fp8,0,0.05435733497142792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,8,64,128,1,fp8,fp8,0,0.05340266724427541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,8,64,0,1,float16,fp8,0,0.05443733433882395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,8,64,0,1,fp8,fp8,0,0.05401599903901418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,64,64,128,1,float16,float16,0,0.035674666364987694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,64,64,0,1,float16,float16,0,0.0374293327331543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,64,64,128,1,float16,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,64,64,128,1,fp8,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,64,64,0,1,float16,fp8,0,0.03602666656176249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,1,64,128,1,float16,float16,0,0.03551466763019562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,64,64,0,1,fp8,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,1,64,0,1,float16,float16,0,0.03477333237727483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,1,64,128,1,float16,fp8,0,0.03591466695070267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,1,64,128,1,fp8,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,1,64,0,1,float16,fp8,0,0.035760000348091125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,1,64,0,1,fp8,fp8,0,0.0337119996547699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,2,64,128,1,float16,float16,0,0.0346666673819224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,2,64,0,1,float16,float16,0,0.035562666753927864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,2,64,128,1,fp8,fp8,0,0.033999999364217125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,2,64,128,1,float16,fp8,0,0.0351200004418691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,2,64,0,1,float16,fp8,0,0.035386666655540466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,2,64,0,1,fp8,fp8,0,0.033946665624777474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,4,64,128,1,float16,float16,0,0.035402665535608925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,4,64,0,1,float16,float16,0,0.035071998834609985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,4,64,128,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,4,64,128,1,fp8,fp8,0,0.0358240008354187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,4,64,0,1,float16,fp8,0,0.03495466709136963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,4,64,0,1,fp8,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,8,64,128,1,float16,float16,0,0.035599999129772186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,8,64,0,1,float16,float16,0,0.03519999980926514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,8,64,128,1,float16,fp8,0,0.03578133384386698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,8,64,128,1,fp8,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,8,64,0,1,float16,fp8,0,0.035904000202814736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,8,64,0,1,fp8,fp8,0,0.03527999917666117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,64,64,128,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,64,64,0,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,64,64,128,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,64,64,128,1,fp8,fp8,0,0.02380266785621643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,64,64,0,1,float16,fp8,0,0.025055999557177227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,1,64,128,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,64,64,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,1,64,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,1,64,128,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,1,64,128,1,fp8,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,1,64,0,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,2,64,128,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,1,64,0,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,2,64,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,2,64,128,1,fp8,fp8,0,0.023754666248957317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,2,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,2,64,0,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,4,64,128,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,2,64,0,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,4,64,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,4,64,128,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,4,64,128,1,fp8,fp8,0,0.024330665667851765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,4,64,0,1,float16,fp8,0,0.023760000864664715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,4,64,0,1,fp8,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,8,64,128,1,float16,float16,0,0.023002666731675465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,8,64,0,1,float16,float16,0,0.023706667125225067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,8,64,128,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,8,64,0,1,float16,fp8,0,0.02363733450571696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,8,64,0,1,fp8,fp8,0,0.024688000480333965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,8,64,128,1,fp8,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,64,64,128,1,float16,float16,0,0.017632000148296356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,64,64,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,64,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,64,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,64,64,128,1,float16,fp8,0,0.019989332805077236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,64,64,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,1,64,128,1,float16,float16,0,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,1,64,0,1,float16,float16,0,0.018090666582187016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,1,64,128,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,1,64,128,1,fp8,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,2,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,1,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,1,64,0,1,float16,fp8,0,0.02004266654451688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,2,64,0,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,2,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,2,64,128,1,fp8,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,2,64,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,2,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,4,64,0,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,4,64,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,4,64,128,1,float16,float16,0,0.017968000223239262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,4,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,4,64,128,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,4,64,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,8,64,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,8,64,128,1,float16,float16,0,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,8,64,128,1,fp8,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,8,64,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,8,64,128,1,float16,fp8,0,0.019754666835069656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,8,64,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,64,64,128,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,64,64,0,1,float16,float16,0,0.016085332880417507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,64,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,64,64,128,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,64,64,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,1,64,128,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,64,64,0,1,fp8,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,1,64,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,1,64,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,1,64,128,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,1,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,1,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,2,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,2,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,2,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,2,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,2,64,0,1,float16,fp8,0,0.016496000190575916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,2,64,0,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,4,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,4,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,4,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,4,64,128,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,4,64,0,1,float16,fp8,0,0.016117333124081295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,8,64,128,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,4,64,0,1,fp8,fp8,0,0.01775466650724411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,8,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,8,64,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,8,64,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,8,64,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,8,64,0,1,fp8,fp8,0,0.018719999740521114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,1,64,128,1,float16,float16,0,0.44335468610127765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,1,64,0,1,float16,float16,0,0.44385067621866864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,1,64,128,1,float16,fp8,0,0.43860801060994464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,1,64,128,1,fp8,fp8,0,0.4170773426691691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,1,64,0,1,fp8,fp8,0,0.41475733121236164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,1,64,0,1,float16,fp8,0,0.4408320188522339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,2,64,128,1,float16,float16,0,0.4402346611022949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,2,64,0,1,float16,float16,0,0.43978134791056317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,2,64,128,1,float16,fp8,0,0.43989864985148114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,2,64,128,1,fp8,fp8,0,0.41947734355926514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,2,64,0,1,fp8,fp8,0,0.422165314356486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,2,64,0,1,float16,fp8,0,0.43988800048828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,4,64,128,1,float16,float16,0,0.4417653481165568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,4,64,0,1,float16,float16,0,0.44041601816813153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,4,64,128,1,float16,fp8,0,0.4378453493118286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,4,64,128,1,fp8,fp8,0,0.4182240168253581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,4,64,0,1,float16,fp8,0,0.43937067190806073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,4,64,0,1,fp8,fp8,0,0.4160746733347575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,8,64,128,1,float16,float16,0,0.4439839919408162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,8,64,128,1,float16,fp8,0,0.4435306787490845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,8,64,0,1,float16,float16,0,0.4462826649347941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,8,64,128,1,fp8,fp8,0,0.4266986846923828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,8,64,0,1,float16,fp8,0,0.442197322845459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,64,64,128,1,float16,float16,0,0.24171199401219687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,8,64,0,1,fp8,fp8,0,0.4248480002085368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,64,64,0,1,float16,float16,0,0.24172266324361166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,64,64,128,1,float16,fp8,0,0.23824000358581543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,64,64,128,1,fp8,fp8,0,0.2421226700146993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,64,64,0,1,float16,fp8,0,0.23836266994476318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,64,64,0,1,fp8,fp8,0,0.24101332823435465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,1,64,128,1,float16,float16,0,0.2291839917500814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,1,64,0,1,float16,float16,0,0.22975999116897583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,1,64,128,1,float16,fp8,0,0.22747200727462769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,1,64,128,1,fp8,fp8,0,0.216703991095225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,1,64,0,1,float16,fp8,0,0.2290133237838745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,1,64,0,1,fp8,fp8,0,0.21586666504542032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,2,64,128,1,float16,float16,0,0.22779200474421182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,2,64,0,1,float16,float16,0,0.2282080054283142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,2,64,128,1,float16,fp8,0,0.22854934136072794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,2,64,128,1,fp8,fp8,0,0.2172693411509196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,2,64,0,1,float16,fp8,0,0.22701332966486612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,2,64,0,1,fp8,fp8,0,0.2168160080909729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,4,64,128,1,float16,float16,0,0.228928009668986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,4,64,0,1,float16,float16,0,0.22734934091567993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,4,64,128,1,float16,fp8,0,0.22874667247136435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,4,64,128,1,fp8,fp8,0,0.21635200579961142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,4,64,0,1,float16,fp8,0,0.22750399510065714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,4,64,0,1,fp8,fp8,0,0.21650665998458862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,8,64,128,1,float16,float16,0,0.23054399092992148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,8,64,0,1,float16,float16,0,0.23042132457097372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,8,64,128,1,float16,fp8,0,0.22762133677800497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,8,64,128,1,fp8,fp8,0,0.22005333503087363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,8,64,0,1,float16,fp8,0,0.22894932826360068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,8,64,0,1,fp8,fp8,0,0.21988266706466675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,64,64,128,1,float16,float16,0,0.12797866264979044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,64,64,0,1,float16,float16,0,0.1272533337275187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,64,64,128,1,float16,fp8,0,0.12799466649691263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,64,64,128,1,fp8,fp8,0,0.12864533066749573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,64,64,0,1,float16,fp8,0,0.127210666735967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,64,64,0,1,fp8,fp8,0,0.12944533427556357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,1,64,128,1,float16,float16,0,0.12166933218638103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,1,64,0,1,float16,float16,0,0.1209493378798167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,1,64,128,1,float16,fp8,0,0.12081600228945415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,1,64,128,1,fp8,fp8,0,0.11191999912261963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,1,64,0,1,float16,fp8,0,0.12108266353607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,1,64,0,1,fp8,fp8,0,0.11371733744939168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,2,64,128,1,float16,float16,0,0.12076800068219502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,2,64,0,1,float16,float16,0,0.1206773320833842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,2,64,128,1,float16,fp8,0,0.12001066406567891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,2,64,128,1,fp8,fp8,0,0.11312533418337505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,2,64,0,1,float16,fp8,0,0.12151466806729634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,2,64,0,1,fp8,fp8,0,0.1132586697737376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,4,64,0,1,float16,float16,0,0.12024000287055969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,4,64,128,1,float16,float16,0,0.12181333700815837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,4,64,128,1,float16,fp8,0,0.12058666348457336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,4,64,128,1,fp8,fp8,0,0.11402133107185364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,4,64,0,1,float16,fp8,0,0.12034133076667786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,4,64,0,1,fp8,fp8,0,0.11409599582354228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,8,64,128,1,float16,float16,0,0.12218133608500163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,8,64,0,1,float16,float16,0,0.12226133545239766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,8,64,128,1,float16,fp8,0,0.12173333764076233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,8,64,128,1,fp8,fp8,0,0.11703466375668843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,8,64,0,1,float16,fp8,0,0.12173866232236226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,8,64,0,1,fp8,fp8,0,0.11619200309117635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,64,64,128,1,float16,float16,0,0.07259200016657512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,64,64,0,1,float16,float16,0,0.07177066802978516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,64,64,128,1,float16,fp8,0,0.0718506673971812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,64,64,128,1,fp8,fp8,0,0.07284266750017802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,64,64,0,1,float16,fp8,0,0.0720960001150767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,64,64,0,1,fp8,fp8,0,0.07258133093516032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,1,64,128,1,float16,float16,0,0.06839466591676076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,1,64,0,1,float16,float16,0,0.06947733461856842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,1,64,128,1,float16,fp8,0,0.0689333329598109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,1,64,128,1,fp8,fp8,0,0.06508266429106395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,1,64,0,1,float16,fp8,0,0.06875200072924297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,1,64,0,1,fp8,fp8,0,0.06644266843795776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,2,64,128,1,float16,float16,0,0.07011199990908305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,2,64,0,1,float16,float16,0,0.06849599877993266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,2,64,128,1,fp8,fp8,0,0.06548800071080525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,2,64,0,1,float16,fp8,0,0.06921066840489705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,2,64,0,1,fp8,fp8,0,0.06449600060780843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,2,64,128,1,float16,fp8,0,0.07038400073846181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,4,64,128,1,float16,float16,0,0.0682239979505539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,4,64,0,1,float16,float16,0,0.06984533369541168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,4,64,128,1,float16,fp8,0,0.06844266752401988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,4,64,0,1,float16,fp8,0,0.0698880006869634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,4,64,0,1,fp8,fp8,0,0.06634133557478587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,4,64,128,1,fp8,fp8,0,0.06783466537793477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,8,64,128,1,float16,float16,0,0.06833600004514058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,8,64,0,1,float16,float16,0,0.0690773328145345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,8,64,128,1,float16,fp8,0,0.06881600121657054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,8,64,0,1,float16,fp8,0,0.06844800213972728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,8,64,0,1,fp8,fp8,0,0.06623999774456024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,8,64,128,1,fp8,fp8,0,0.06784533460934956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,64,64,128,1,float16,float16,0,0.04173333446184794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,64,64,0,1,float16,float16,0,0.04351999859015147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,64,64,128,1,float16,fp8,0,0.043840001026789345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,64,64,0,1,float16,fp8,0,0.04192533095677694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,64,64,0,1,fp8,fp8,0,0.04314666489760081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,1,64,128,1,float16,float16,0,0.04159999887148539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,64,64,128,1,fp8,fp8,0,0.043178667624791466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,1,64,0,1,float16,float16,0,0.04146133363246918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,1,64,128,1,float16,fp8,0,0.04204266766707102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,1,64,128,1,fp8,fp8,0,0.03952533255020777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,1,64,0,1,float16,fp8,0,0.042778665820757546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,1,64,0,1,fp8,fp8,0,0.04090133309364319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,2,64,0,1,float16,float16,0,0.04165866722663244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,2,64,128,1,float16,float16,0,0.041834667325019836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,2,64,128,1,fp8,fp8,0,0.04082666585842768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,2,64,0,1,float16,fp8,0,0.041696002086003624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,2,64,0,1,fp8,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,2,64,128,1,float16,fp8,0,0.042266666889190674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,4,64,128,1,float16,float16,0,0.04185600082079569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,4,64,0,1,float16,float16,0,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,4,64,128,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,4,64,128,1,fp8,fp8,0,0.04035733391841253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,4,64,0,1,float16,fp8,0,0.04124266654253006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,4,64,0,1,fp8,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,8,64,0,1,float16,float16,0,0.04192533095677694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,8,64,128,1,float16,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,8,64,128,1,float16,float16,0,0.04254400233427683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,8,64,128,1,fp8,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,8,64,0,1,float16,fp8,0,0.0415040006240209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,8,64,0,1,fp8,fp8,0,0.04104000081618627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,64,64,128,1,float16,float16,0,0.029029332101345062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,64,64,0,1,float16,float16,0,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,64,64,128,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,64,64,128,1,float16,fp8,0,0.03031466652949651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,64,64,0,1,float16,fp8,0,0.027973333994547527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,64,64,0,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,1,64,0,1,float16,float16,0,0.02741866558790207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,1,64,128,1,float16,float16,0,0.028229333460330963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,1,64,128,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,1,64,128,1,fp8,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,1,64,0,1,float16,fp8,0,0.027765333652496338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,1,64,0,1,fp8,fp8,0,0.027690666417280834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,2,64,128,1,float16,float16,0,0.027429332335789997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,2,64,128,1,float16,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,2,64,128,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,2,64,0,1,float16,float16,0,0.027813332776228588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,2,64,0,1,float16,fp8,0,0.027845333019892376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,2,64,0,1,fp8,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,4,64,128,1,float16,float16,0,0.02807466685771942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,4,64,0,1,float16,float16,0,0.027530667682488758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,4,64,128,1,float16,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,4,64,128,1,fp8,fp8,0,0.02789866675933202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,4,64,0,1,float16,fp8,0,0.028570666909217834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,4,64,0,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,8,64,0,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,8,64,128,1,float16,float16,0,0.028058665494124096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,8,64,128,1,float16,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,8,64,128,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,8,64,0,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,8,64,0,1,fp8,fp8,0,0.02794133375088374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,64,64,128,1,float16,float16,0,0.019941333681344986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,64,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,64,64,128,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,64,64,0,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,64,64,128,1,float16,fp8,0,0.020810666183630627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,64,64,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,1,64,128,1,float16,float16,0,0.01988799994190534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,1,64,0,1,float16,float16,0,0.019962667177120846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,1,64,128,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,1,64,128,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,1,64,0,1,float16,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,1,64,0,1,fp8,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,2,64,128,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,2,64,128,1,float16,fp8,0,0.019845332950353622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,2,64,0,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,2,64,128,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,2,64,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,4,64,128,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,2,64,0,1,fp8,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,4,64,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,4,64,128,1,float16,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,4,64,128,1,fp8,fp8,0,0.019679999599854153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,4,64,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,4,64,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,8,64,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,8,64,128,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,8,64,128,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,8,64,128,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,8,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,8,64,0,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,64,64,128,1,float16,float16,0,0.016229332735141117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,64,64,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,64,64,128,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,64,64,128,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,64,64,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,1,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,64,64,0,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,1,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,1,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,1,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,1,64,128,1,fp8,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,1,64,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,2,64,128,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,2,64,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,2,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,2,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,2,64,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,2,64,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,4,64,128,1,float16,float16,0,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,4,64,0,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,4,64,128,1,fp8,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,4,64,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,8,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,4,64,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,8,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,8,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,8,64,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,8,64,0,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,8,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,64,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,64,64,0,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,64,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,64,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,64,64,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,64,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,1,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,1,64,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,1,64,128,1,fp8,fp8,0,0.015749332805474598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,1,64,128,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,1,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,1,64,0,1,fp8,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,2,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,2,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,2,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,2,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,2,64,128,1,fp8,fp8,0,0.015770666301250458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,2,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,4,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,4,64,0,1,float16,float16,0,0.015722667177518208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,4,64,128,1,float16,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,4,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,4,64,0,1,float16,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,4,64,0,1,fp8,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,8,64,128,1,float16,float16,0,0.016501333564519882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,8,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,8,64,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,8,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,8,64,0,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,8,64,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,1,64,128,1,float16,float16,0,0.34249599774678546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,1,64,0,1,float16,float16,0,0.34379732608795166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,1,64,128,1,float16,fp8,0,0.34223465124766034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,1,64,128,1,fp8,fp8,0,0.32253867387771606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,1,64,0,1,float16,fp8,0,0.34135464827219647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,1,64,0,1,fp8,fp8,0,0.32364267110824585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,2,64,128,1,float16,float16,0,0.34307201703389484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,2,64,0,1,float16,float16,0,0.3433493375778198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,2,64,128,1,float16,fp8,0,0.3430773417154948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,2,64,128,1,fp8,fp8,0,0.3232799967130025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,2,64,0,1,float16,fp8,0,0.3420373201370239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,2,64,0,1,fp8,fp8,0,0.3234293262163798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,4,64,128,1,float16,float16,0,0.34256001313527423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,4,64,0,1,float16,float16,0,0.3427199920018514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,4,64,128,1,float16,fp8,0,0.34142935276031494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,4,64,128,1,fp8,fp8,0,0.32046933968861896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,4,64,0,1,float16,fp8,0,0.34093332290649414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,4,64,0,1,fp8,fp8,0,0.3221279978752136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,8,64,128,1,float16,float16,0,0.34228265285491943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,8,64,0,1,float16,float16,0,0.34332799911499023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,8,64,128,1,float16,fp8,0,0.34346667925516766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,8,64,0,1,float16,fp8,0,0.34253867467244464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,8,64,128,1,fp8,fp8,0,0.32786667346954346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,8,64,0,1,fp8,fp8,0,0.3259200056393941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,64,64,128,1,float16,float16,0,0.18572266896565756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,64,64,0,1,float16,float16,0,0.18532800674438477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,64,64,128,1,float16,fp8,0,0.1856266657511393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,64,64,128,1,fp8,fp8,0,0.1825973391532898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,64,64,0,1,float16,fp8,0,0.18479466438293457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,64,64,0,1,fp8,fp8,0,0.18255466222763062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,1,64,128,1,float16,float16,0,0.17776000499725342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,1,64,0,1,float16,float16,0,0.1784800092379252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,1,64,128,1,fp8,fp8,0,0.16676799456278482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,1,64,128,1,float16,fp8,0,0.17754133542378744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,1,64,0,1,float16,fp8,0,0.1774079998334249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,1,64,0,1,fp8,fp8,0,0.1665173371632894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,2,64,128,1,float16,float16,0,0.17695999145507812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,2,64,0,1,float16,float16,0,0.17749333381652832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,2,64,128,1,float16,fp8,0,0.17903467019399008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,2,64,128,1,fp8,fp8,0,0.16683199008305868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,2,64,0,1,float16,fp8,0,0.1768853267033895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,2,64,0,1,fp8,fp8,0,0.16615466276804605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,4,64,0,1,float16,float16,0,0.17719467480977377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,4,64,128,1,float16,float16,0,0.17950934171676636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,4,64,128,1,float16,fp8,0,0.17706133921941122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,4,64,128,1,fp8,fp8,0,0.16708266735076904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,4,64,0,1,float16,fp8,0,0.17706666390101114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,4,64,0,1,fp8,fp8,0,0.1668213407198588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,8,64,128,1,float16,float16,0,0.17839999993642172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,8,64,0,1,float16,float16,0,0.1801919937133789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,8,64,128,1,float16,fp8,0,0.1790026624997457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,8,64,128,1,fp8,fp8,0,0.16834133863449097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,8,64,0,1,float16,fp8,0,0.1790133317311605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,64,64,128,1,float16,float16,0,0.09937066833178203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,64,64,0,1,float16,float16,0,0.09922132889429729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,8,64,0,1,fp8,fp8,0,0.17113600174585977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,64,64,128,1,float16,fp8,0,0.09981866677602132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,64,64,128,1,fp8,fp8,0,0.0995360016822815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,64,64,0,1,float16,fp8,0,0.0993173321088155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,1,64,128,1,float16,float16,0,0.09672000010808308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,64,64,0,1,fp8,fp8,0,0.09968533118565877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,1,64,0,1,float16,float16,0,0.09722666939099629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,1,64,128,1,float16,fp8,0,0.09725333253542583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,1,64,128,1,fp8,fp8,0,0.09129599730173747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,1,64,0,1,float16,fp8,0,0.09758399923642476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,1,64,0,1,fp8,fp8,0,0.09100799759229024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,2,64,128,1,float16,float16,0,0.09724799791971843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,2,64,0,1,float16,float16,0,0.09710933764775594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,2,64,128,1,float16,fp8,0,0.0958720048268636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,2,64,128,1,fp8,fp8,0,0.09107733766237895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,2,64,0,1,float16,fp8,0,0.09713066617647807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,2,64,0,1,fp8,fp8,0,0.09215999643007915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,4,64,128,1,float16,float16,0,0.09619733691215515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,4,64,0,1,float16,float16,0,0.09732266267140706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,4,64,128,1,float16,fp8,0,0.0972160001595815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,4,64,128,1,fp8,fp8,0,0.09289600451787312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,4,64,0,1,float16,fp8,0,0.0963253378868103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,4,64,0,1,fp8,fp8,0,0.09243200222651164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,8,64,0,1,float16,float16,0,0.09619200229644775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,8,64,128,1,float16,fp8,0,0.0956160028775533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,8,64,128,1,float16,float16,0,0.09738133351008098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,8,64,0,1,float16,fp8,0,0.09603200356165568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,8,64,128,1,fp8,fp8,0,0.09160533547401428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,8,64,0,1,fp8,fp8,0,0.09169600407282512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,64,64,128,1,float16,float16,0,0.05657599866390228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,64,64,0,1,float16,float16,0,0.056261335810025535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,64,64,128,1,float16,fp8,0,0.056703999638557434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,64,64,128,1,fp8,fp8,0,0.055946667989095054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,64,64,0,1,float16,fp8,0,0.05806933343410492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,64,64,0,1,fp8,fp8,0,0.05555200080076853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,1,64,128,1,float16,float16,0,0.05633600056171417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,1,64,0,1,float16,float16,0,0.05632533133029938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,1,64,128,1,float16,fp8,0,0.05638400216897329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,1,64,128,1,fp8,fp8,0,0.05417066812515259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,1,64,0,1,float16,fp8,0,0.05632533133029938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,2,64,128,1,float16,float16,0,0.055914665261904396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,2,64,0,1,float16,float16,0,0.05606933434804281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,1,64,0,1,fp8,fp8,0,0.054645334680875145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,2,64,128,1,float16,fp8,0,0.05601066847642263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,2,64,128,1,fp8,fp8,0,0.054245332876841225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,2,64,0,1,float16,fp8,0,0.05676800012588501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,2,64,0,1,fp8,fp8,0,0.054245332876841225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,4,64,128,1,float16,float16,0,0.056218668818473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,4,64,0,1,float16,float16,0,0.05676800012588501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,4,64,128,1,float16,fp8,0,0.05601066847642263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,4,64,128,1,fp8,fp8,0,0.0543093333641688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,4,64,0,1,float16,fp8,0,0.056943997740745544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,4,64,0,1,fp8,fp8,0,0.05427733560403188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,8,64,128,1,float16,float16,0,0.05625066657861074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,8,64,128,1,float16,fp8,0,0.05618133147557577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,8,64,0,1,float16,float16,0,0.056330665946006775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,8,64,128,1,fp8,fp8,0,0.05442666510740916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,8,64,0,1,float16,fp8,0,0.05625066657861074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,64,64,128,1,float16,float16,0,0.03521066655715307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,8,64,0,1,fp8,fp8,0,0.05426666637261709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,64,64,128,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,64,64,0,1,float16,float16,0,0.034927998979886375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,64,64,128,1,fp8,fp8,0,0.03391999999682108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,64,64,0,1,float16,fp8,0,0.035205334424972534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,64,64,0,1,fp8,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,1,64,128,1,float16,float16,0,0.034128000338872276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,1,64,0,1,float16,float16,0,0.035562666753927864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,1,64,128,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,1,64,128,1,fp8,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,1,64,0,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,1,64,0,1,fp8,fp8,0,0.03385066737731298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,2,64,128,1,float16,float16,0,0.035455999275048576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,2,64,128,1,float16,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,2,64,0,1,float16,float16,0,0.03475733349720637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,2,64,128,1,fp8,fp8,0,0.03380800038576126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,2,64,0,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,2,64,0,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,4,64,128,1,float16,float16,0,0.03373866776625315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,4,64,0,1,float16,float16,0,0.03580799947182337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,4,64,128,1,float16,fp8,0,0.033904001116752625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,4,64,128,1,fp8,fp8,0,0.033402666449546814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,4,64,0,1,float16,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,8,64,128,1,float16,float16,0,0.035349334279696144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,8,64,0,1,float16,float16,0,0.03358400116364161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,4,64,0,1,fp8,fp8,0,0.03390933324893316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,8,64,128,1,float16,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,8,64,128,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,8,64,0,1,float16,fp8,0,0.035829332967599235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,8,64,0,1,fp8,fp8,0,0.03358400116364161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,64,64,128,1,float16,float16,0,0.02418133368094762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,64,64,0,1,float16,float16,0,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,64,64,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,64,64,128,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,64,64,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,64,64,0,1,float16,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,1,64,128,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,1,64,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,1,64,128,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,1,64,128,1,float16,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,1,64,0,1,float16,fp8,0,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,1,64,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,2,64,128,1,float16,float16,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,2,64,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,2,64,128,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,2,64,0,1,float16,fp8,0,0.023973333338896435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,2,64,128,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,4,64,128,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,2,64,0,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,4,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,4,64,128,1,fp8,fp8,0,0.024346667031447094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,4,64,128,1,float16,fp8,0,0.023946667710940044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,4,64,0,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,4,64,0,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,8,64,128,1,float16,float16,0,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,8,64,0,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,8,64,128,1,float16,fp8,0,0.026650667190551758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,8,64,128,1,fp8,fp8,0,0.024117333193620045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,8,64,0,1,float16,fp8,0,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,64,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,8,64,0,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,64,64,0,1,float16,float16,0,0.01871466636657715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,64,64,128,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,64,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,64,64,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,1,64,128,1,float16,float16,0,0.018746666610240936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,64,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,1,64,0,1,float16,float16,0,0.018895999838908512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,1,64,128,1,float16,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,1,64,128,1,fp8,fp8,0,0.01777600000301997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,1,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,1,64,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,2,64,128,1,float16,float16,0,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,2,64,0,1,float16,float16,0,0.0185759998857975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,2,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,2,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,2,64,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,4,64,128,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,2,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,4,64,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,4,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,4,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,4,64,0,1,float16,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,4,64,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,8,64,128,1,float16,float16,0,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,8,64,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,8,64,128,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,8,64,128,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,8,64,0,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,8,64,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,64,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,64,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,64,64,128,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,64,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,64,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,64,64,0,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,1,64,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,1,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,1,64,128,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,1,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,1,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,1,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,2,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,2,64,0,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,2,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,2,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,2,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,2,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,4,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,4,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,4,64,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,4,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,4,64,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,4,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,8,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,8,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,8,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,8,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,8,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,8,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,64,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,64,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,64,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,64,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,64,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,64,64,0,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,1,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,1,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,1,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,1,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,1,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,1,64,0,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,2,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,2,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,2,64,128,1,float16,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,2,64,128,1,fp8,fp8,0,0.016480000068744022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,2,64,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,2,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,4,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,4,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,4,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,4,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,4,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,4,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,8,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,8,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,8,64,128,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,8,64,128,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,8,64,0,1,float16,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,8,64,0,1,fp8,fp8,0,0.01617066686352094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,1,64,128,1,float16,float16,0,0.2992159922917684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,1,64,0,1,float16,float16,0,0.2975359956423442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,1,64,128,1,float16,fp8,0,0.2999253273010254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,1,64,128,1,fp8,fp8,0,0.2754879991213481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,1,64,0,1,fp8,fp8,0,0.2755253314971924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,1,64,0,1,float16,fp8,0,0.29781333605448407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,2,64,128,1,float16,float16,0,0.29919467369715375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,2,64,0,1,float16,float16,0,0.29808000723520917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,2,64,128,1,fp8,fp8,0,0.27531200647354126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,2,64,128,1,float16,fp8,0,0.29920534292856854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,2,64,0,1,float16,fp8,0,0.2985866665840149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,2,64,0,1,fp8,fp8,0,0.2754986683527629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,4,64,0,1,float16,float16,0,0.2985440095265706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,4,64,128,1,float16,float16,0,0.30002667506535846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,4,64,128,1,float16,fp8,0,0.2983520030975342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,4,64,128,1,fp8,fp8,0,0.27532800038655597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,4,64,0,1,float16,fp8,0,0.2985759973526001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,4,64,0,1,fp8,fp8,0,0.27685866753260296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,8,64,128,1,float16,float16,0,0.29868799448013306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,8,64,0,1,float16,float16,0,0.29892265796661377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,8,64,128,1,float16,fp8,0,0.29873599608739215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,8,64,128,1,fp8,fp8,0,0.27725332975387573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,64,64,128,1,float16,float16,0,0.15933332840601602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,8,64,0,1,fp8,fp8,0,0.2779680093129476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,8,64,0,1,float16,fp8,0,0.3002240061759949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,64,64,0,1,float16,float16,0,0.16058133045832315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,64,64,128,1,float16,fp8,0,0.15890133380889893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,64,64,128,1,fp8,fp8,0,0.15498666961987814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,64,64,0,1,float16,fp8,0,0.16063466668128967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,64,64,0,1,fp8,fp8,0,0.15292800466219583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,1,64,0,1,float16,float16,0,0.15652799606323242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,1,64,128,1,float16,float16,0,0.15788267056147257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,1,64,128,1,float16,fp8,0,0.15661333004633585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,1,64,128,1,fp8,fp8,0,0.14563199877738953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,1,64,0,1,fp8,fp8,0,0.14614933729171753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,1,64,0,1,float16,fp8,0,0.157231996456782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,2,64,128,1,float16,float16,0,0.1569760044415792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,2,64,0,1,float16,float16,0,0.15797332922617593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,2,64,128,1,float16,fp8,0,0.15702399611473083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,2,64,128,1,fp8,fp8,0,0.1462399959564209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,2,64,0,1,float16,fp8,0,0.15707199772198996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,2,64,0,1,fp8,fp8,0,0.14538133144378662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,4,64,128,1,float16,float16,0,0.15684800346692404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,4,64,0,1,float16,float16,0,0.1562933325767517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,4,64,128,1,float16,fp8,0,0.1572106679280599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,4,64,128,1,fp8,fp8,0,0.14619732896486917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,4,64,0,1,float16,fp8,0,0.15678933262825012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,4,64,0,1,fp8,fp8,0,0.14678399761517844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,8,64,128,1,float16,float16,0,0.15730667114257812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,8,64,0,1,float16,float16,0,0.15615466237068176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,8,64,128,1,float16,fp8,0,0.1567093332608541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,64,64,128,1,float16,float16,0,0.08685866991678874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,8,64,0,1,float16,fp8,0,0.157231996456782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,8,64,128,1,fp8,fp8,0,0.14685333768526712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,8,64,0,1,fp8,fp8,0,0.14643733700116476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,64,64,0,1,float16,float16,0,0.08701866865158081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,64,64,128,1,fp8,fp8,0,0.08275733391443889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,64,64,128,1,float16,fp8,0,0.08711999654769897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,64,64,0,1,float16,fp8,0,0.08727999528249104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,64,64,0,1,fp8,fp8,0,0.08288000027338664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,1,64,128,1,float16,float16,0,0.08498666683832805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,1,64,0,1,float16,float16,0,0.0860693355401357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,1,64,128,1,float16,fp8,0,0.08548266688982646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,1,64,128,1,fp8,fp8,0,0.08044266700744629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,1,64,0,1,float16,fp8,0,0.08679466446240743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,1,64,0,1,fp8,fp8,0,0.08021866778532664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,2,64,128,1,float16,float16,0,0.08528000116348267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,2,64,0,1,float16,float16,0,0.08477333188056946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,2,64,128,1,float16,fp8,0,0.08673600355784099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,2,64,128,1,fp8,fp8,0,0.0795253316561381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,2,64,0,1,fp8,fp8,0,0.08045866588751475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,2,64,0,1,float16,fp8,0,0.08698667089144389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,4,64,128,1,float16,float16,0,0.0867733359336853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,4,64,128,1,float16,fp8,0,0.08650133013725281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,4,64,0,1,float16,float16,0,0.08517866333325703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,4,64,0,1,float16,fp8,0,0.08675199747085571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,4,64,128,1,fp8,fp8,0,0.08074666559696198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,4,64,0,1,fp8,fp8,0,0.08085866769154866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,8,64,0,1,float16,float16,0,0.08514133095741272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,8,64,128,1,float16,fp8,0,0.08725866675376892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,8,64,128,1,float16,float16,0,0.08540266752243042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,8,64,128,1,fp8,fp8,0,0.08045866588751475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,8,64,0,1,float16,fp8,0,0.08526399731636047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,64,64,128,1,float16,float16,0,0.05060799916585287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,8,64,0,1,fp8,fp8,0,0.0812960018714269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,64,64,0,1,float16,float16,0,0.05005866785844167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,64,64,128,1,float16,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,64,64,0,1,float16,fp8,0,0.05013333261013031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,64,64,128,1,fp8,fp8,0,0.04898133377234141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,64,64,0,1,fp8,fp8,0,0.04770666857560476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,1,64,128,1,float16,float16,0,0.05002133548259735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,1,64,0,1,float16,float16,0,0.04981866478919983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,1,64,128,1,fp8,fp8,0,0.04770666857560476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,1,64,128,1,float16,fp8,0,0.04984533290068308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,1,64,0,1,float16,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,1,64,0,1,fp8,fp8,0,0.048170665899912514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,2,64,128,1,float16,float16,0,0.049786667029062905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,2,64,128,1,float16,fp8,0,0.05005866785844167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,2,64,0,1,float16,float16,0,0.050010666251182556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,2,64,0,1,float16,fp8,0,0.04979733129342397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,2,64,128,1,fp8,fp8,0,0.048101335763931274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,2,64,0,1,fp8,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,4,64,128,1,float16,float16,0,0.050479998191197716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,4,64,0,1,float16,float16,0,0.04971200227737427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,4,64,128,1,float16,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,4,64,128,1,fp8,fp8,0,0.047882666190465294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,4,64,0,1,float16,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,4,64,0,1,fp8,fp8,0,0.04808533191680908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,8,64,128,1,float16,float16,0,0.05009600023428599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,8,64,0,1,float16,float16,0,0.05012266834576925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,8,64,128,1,float16,fp8,0,0.04994666576385498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,8,64,128,1,fp8,fp8,0,0.04809066653251648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,8,64,0,1,fp8,fp8,0,0.0479360024134318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,8,64,0,1,float16,fp8,0,0.05067199965318044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,64,64,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,64,64,0,1,float16,float16,0,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,64,64,128,1,fp8,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,64,64,128,1,float16,fp8,0,0.03187733391920725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,64,64,0,1,float16,fp8,0,0.031898667414983116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,64,64,0,1,fp8,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,1,64,128,1,float16,float16,0,0.031845333675543465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,1,64,0,1,float16,float16,0,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,1,64,128,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,1,64,128,1,fp8,fp8,0,0.029887999097506206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,1,64,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,1,64,0,1,fp8,fp8,0,0.031061333914597828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,2,64,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,2,64,128,1,float16,float16,0,0.03179199993610382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,2,64,128,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,2,64,128,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,2,64,0,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,2,64,0,1,fp8,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,4,64,128,1,float16,float16,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,4,64,128,1,float16,fp8,0,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,4,64,0,1,float16,float16,0,0.032042667269706726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,4,64,128,1,fp8,fp8,0,0.030048000315825146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,4,64,0,1,float16,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,4,64,0,1,fp8,fp8,0,0.031013332307338715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,8,64,128,1,float16,float16,0,0.031317333380381264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,8,64,0,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,8,64,128,1,fp8,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,8,64,128,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,8,64,0,1,float16,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,8,64,0,1,fp8,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,64,64,128,1,float16,float16,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,64,64,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,64,64,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,64,64,128,1,fp8,fp8,0,0.021781332790851593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,64,64,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,64,64,0,1,fp8,fp8,0,0.022917332748572033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,1,64,0,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,1,64,128,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,1,64,128,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,1,64,128,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,1,64,0,1,float16,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,1,64,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,2,64,128,1,float16,float16,0,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,2,64,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,2,64,128,1,float16,fp8,0,0.023754666248957317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,2,64,128,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,2,64,0,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,2,64,0,1,fp8,fp8,0,0.022085333863894146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,4,64,128,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,4,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,4,64,128,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,4,64,128,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,4,64,0,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,8,64,128,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,8,64,0,1,float16,float16,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,4,64,0,1,fp8,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,8,64,128,1,float16,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,8,64,128,1,fp8,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,8,64,0,1,float16,fp8,0,0.02362666775782903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,8,64,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,64,64,128,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,64,64,0,1,float16,float16,0,0.018805333723624546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,64,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,64,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,64,64,0,1,float16,fp8,0,0.01876266673207283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,1,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,64,64,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,1,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,1,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,1,64,128,1,fp8,fp8,0,0.018144000321626663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,1,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,1,64,0,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,2,64,128,1,float16,float16,0,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,2,64,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,2,64,128,1,float16,fp8,0,0.018063999712467194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,2,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,2,64,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,2,64,128,1,fp8,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,4,64,128,1,float16,float16,0,0.01883200059334437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,4,64,0,1,float16,float16,0,0.018922666708628338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,4,64,128,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,4,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,4,64,0,1,float16,fp8,0,0.018543999642133713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,4,64,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,8,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,8,64,0,1,float16,float16,0,0.017957333475351334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,8,64,128,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,8,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,8,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,8,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,64,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,64,64,128,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,64,64,0,1,float16,float16,0,0.016634666671355564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,64,64,128,1,fp8,fp8,0,0.016714667280515034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,64,64,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,64,64,0,1,fp8,fp8,0,0.020031999796628952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,1,64,128,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,1,64,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,1,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,1,64,128,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,1,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,1,64,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,2,64,128,1,float16,float16,0,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,2,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,2,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,2,64,128,1,fp8,fp8,0,0.015728000551462173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,2,64,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,2,64,0,1,fp8,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,4,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,4,64,128,1,float16,float16,0,0.01584533353646596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,4,64,128,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,4,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,4,64,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,8,64,128,1,float16,float16,0,0.016538667182127636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,4,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,8,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,8,64,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,8,64,128,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,8,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,8,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,64,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,64,64,128,1,float16,float16,0,0.015909332782030106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,64,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,64,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,64,64,0,1,float16,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,64,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,1,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,1,64,128,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,1,64,0,1,float16,float16,0,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,1,64,128,1,fp8,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,1,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,1,64,0,1,fp8,fp8,0,0.015957333147525787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,2,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,2,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,2,64,128,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,2,64,128,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,2,64,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,4,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,2,64,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,4,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,4,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,4,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,4,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,4,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,8,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,8,64,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,8,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,8,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,8,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,8,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,0,0.25699732700983685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,0,0.25672000646591187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,0,0.25543999671936035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,1,64,128,1,fp8,fp8,0,0.23238933086395264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,0,0.2568693359692891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,1,64,0,1,fp8,fp8,0,0.23318399985631308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,0,0.25597333908081055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,0,0.2551093300183614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,0,0.2552693287531535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,2,64,128,1,fp8,fp8,0,0.23218133052190146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,0,0.25683732827504474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,2,64,0,1,fp8,fp8,0,0.2329919934272766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,0,0.25679999589920044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,0,0.25487999121348065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,0,0.2574399908383687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,4,64,128,1,fp8,fp8,0,0.2323573430379232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,0,0.2549813389778137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,4,64,0,1,fp8,fp8,0,0.2327786684036255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,0,0.2561013301213582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,0,0.25600532690684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,0,0.2579946716626485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,8,64,128,1,fp8,fp8,0,0.23245867093404135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,0,0.25571199258168537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,64,64,128,1,float16,float16,0,0.13674666484196982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,8,64,0,1,fp8,fp8,0,0.2322346568107605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,0,0.1344373325506846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,64,64,128,1,float16,fp8,0,0.13530666629473367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,0,0.1343946655591329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,64,64,128,1,fp8,fp8,0,0.12425067027409871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,64,64,0,1,fp8,fp8,0,0.12385599811871846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,0,0.13544533650080362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,0,0.1341546674569448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,0,0.1366986632347107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,1,64,128,1,fp8,fp8,0,0.12429333726565044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,0,0.13572800159454346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,1,64,0,1,fp8,fp8,0,0.12164266904195149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,0,0.13450133800506592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,0,0.13596799969673157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,0,0.1341653366883596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,2,64,128,1,fp8,fp8,0,0.12194666266441345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,0,0.13409066200256348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,0,0.13582932949066162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,2,64,0,1,fp8,fp8,0,0.1241439978281657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,0,0.134853333234787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,0,0.1360640029112498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,4,64,128,1,fp8,fp8,0,0.1237546702226003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,0,0.13608533143997192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,4,64,0,1,fp8,fp8,0,0.12384532888730367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,0,0.1341386636098226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,0,0.13556266824404398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,0,0.13612799843152365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,0,0.1363200048605601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,8,64,128,1,fp8,fp8,0,0.12232533097267151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,8,64,0,1,fp8,fp8,0,0.12377066413561504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,64,64,128,1,float16,float16,0,0.07441066702206929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,0,0.0749066670735677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,64,64,128,1,float16,fp8,0,0.07453866799672444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,64,64,128,1,fp8,fp8,0,0.06832000116507213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,0,0.07520000139872234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,64,64,0,1,fp8,fp8,0,0.06858666737874348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,0,0.07435200115044911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,0,0.07460266848405202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,0,0.07479999959468842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,1,64,128,1,fp8,fp8,0,0.06829866766929626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,1,64,0,1,fp8,fp8,0,0.06845866640408833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,0,0.07515733440717061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,0,0.07468800246715546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,0,0.07455466687679291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,0,0.07514666517575581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,2,64,128,1,fp8,fp8,0,0.06877333422501881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,0,0.07470400134722392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,0,0.07457066575686137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,2,64,0,1,fp8,fp8,0,0.06864533325036366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,0,0.07426666716734569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,0,0.07467733323574066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,4,64,128,1,fp8,fp8,0,0.0689333329598109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,0,0.07464533547560374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,4,64,0,1,fp8,fp8,0,0.06830933193365733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,0,0.075162669022878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,0,0.0746666689713796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,0,0.07494399944941203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,8,64,128,1,fp8,fp8,0,0.06865066786607106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,0,0.07486400008201599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,8,64,0,1,fp8,fp8,0,0.06876266499360402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,0,0.04561600089073181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,64,64,128,1,float16,float16,0,0.04614933331807455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,64,64,128,1,float16,fp8,0,0.045968001087506614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,64,64,128,1,fp8,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,0,0.04414933423201243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,64,64,0,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,0,0.043935999274253845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,0,0.04409599800904592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,0,0.04457066456476847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,1,64,128,1,fp8,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,1,64,0,1,fp8,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,0,0.043935999274253845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,0,0.04388799766699473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,0,0.04420266548792521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,0,0.04389866689840952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,2,64,128,1,fp8,fp8,0,0.04165333261092504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,2,64,0,1,fp8,fp8,0,0.0421973317861557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,0,0.04574933151404063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,0,0.045466666420300804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,4,64,128,1,fp8,fp8,0,0.041893333196640015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,0,0.04636266827583313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,0,0.04589866598447164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,4,64,0,1,fp8,fp8,0,0.042117332418759666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,0,0.04398400088151296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,0,0.04576533536116282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,0,0.045797333121299744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,8,64,128,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,0,0.04577599962552389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,64,64,128,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,8,64,0,1,fp8,fp8,0,0.0423573354880015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,64,64,128,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,0,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,64,64,128,1,fp8,fp8,0,0.02762666592995326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,64,64,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,0,0.029882666965325672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,0,0.029482667644818623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,0,0.029189333319664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,1,64,0,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,0,0.02997333308060964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,1,64,128,1,fp8,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,0,0.029637334247430164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,2,64,128,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,2,64,0,1,fp8,fp8,0,0.02864533414443334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,0,0.029338667790095013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,0,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,4,64,128,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,0,0.029904000461101532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,0,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,0,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,4,64,0,1,fp8,fp8,0,0.02792000025510788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,8,64,128,1,fp8,fp8,0,0.029167999823888142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,8,64,0,1,fp8,fp8,0,0.029781334102153778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,64,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,0,0.021925332645575207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,64,64,128,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,64,64,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,64,64,0,1,fp8,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,0,0.023589332898457844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,1,64,128,1,fp8,fp8,0,0.02239999920129776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,1,64,0,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,0,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,2,64,128,1,fp8,fp8,0,0.02189333240191142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,2,64,0,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,0,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,4,64,128,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,4,64,0,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,8,64,128,1,fp8,fp8,0,0.021759999295075733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,8,64,0,1,fp8,fp8,0,0.021701333423455555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,64,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,64,64,128,1,float16,fp8,0,0.017808000246683758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,64,64,128,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,64,64,0,1,fp8,fp8,0,0.01802666609485944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,0,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,1,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,1,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,0,0.01883200059334437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,0,0.017685333887736004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,2,64,128,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,2,64,0,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,4,64,128,1,fp8,fp8,0,0.01850133389234543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,0,0.01844266677896182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,4,64,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,0,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,8,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,0,0.018730666488409042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,8,64,0,1,fp8,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,64,64,128,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,64,64,128,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,64,64,128,1,fp8,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,64,64,0,1,fp8,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,0,0.016544000556071598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,1,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,1,64,0,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,2,64,128,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,0,0.015647999942302704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,2,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,4,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,4,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,0,0.016645333419243496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,8,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,64,64,128,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,8,64,0,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,64,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,64,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,64,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,1,64,128,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,1,64,0,1,fp8,fp8,0,0.016117333124081295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,2,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,0,0.015850666910409927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,2,64,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,0,0.015754666179418564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,0,0.01600533351302147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,4,64,128,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,4,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,8,64,128,1,fp8,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,8,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,float16,0,2.4885279337565103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,2,64,128,1,float16,fp8,0,2.511685371398926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,2,64,128,1,fp8,fp8,0,2.290224075317383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,float16,0,2.5030879974365234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,float16,0,12.78607432047526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,4,64,128,1,float16,fp8,0,2.526250680287679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,2,64,0,1,float16,fp8,0,12.829264322916666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,2,64,0,1,fp8,fp8,0,11.046379089355469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,4,64,128,1,fp8,fp8,0,2.3148159980773926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,float16,0,2.5398240089416504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,float16,0,12.861146291097006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,8,64,128,1,float16,fp8,0,2.5621493657430015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,8,64,128,1,fp8,fp8,0,2.3488693237304688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,4,64,0,1,fp8,fp8,0,11.050453186035156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,4,64,0,1,float16,fp8,0,12.8711306254069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,float16,0,1.4551733334859211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,float16,0,12.907024383544922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,48,64,128,1,float16,fp8,0,1.492106596628825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,float16,0,6.740655899047852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,48,64,128,1,fp8,fp8,0,1.394506613413493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,8,64,0,1,fp8,fp8,0,11.115557352701822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,8,64,0,1,float16,fp8,0,12.89794667561849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,float16,0,1.2930080095926921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,2,64,128,1,float16,fp8,0,1.3047040303548176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,48,64,0,1,fp8,fp8,0,5.81498654683431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,2,64,128,1,fp8,fp8,0,1.1904799938201904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,48,64,0,1,float16,fp8,0,6.758810679117839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,float16,0,1.295967976252238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,float16,0,6.502053578694661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,4,64,128,1,float16,fp8,0,1.3153599898020427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,2,64,0,1,fp8,fp8,0,5.606565475463867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,4,64,128,1,fp8,fp8,0,1.2012426853179932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,2,64,0,1,float16,fp8,0,6.514453252156575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,float16,0,1.31168532371521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,float16,0,6.512282689412435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,8,64,128,1,float16,fp8,0,1.3265973726908367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,8,64,128,1,fp8,fp8,0,1.2196906407674153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,4,64,0,1,fp8,fp8,0,5.616325378417969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,4,64,0,1,float16,fp8,0,6.5093123118082685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,float16,0,0.802074670791626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,float16,0,6.53877321879069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,48,64,128,1,float16,fp8,0,0.8214026292165121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,48,64,128,1,fp8,fp8,0,0.7750186920166016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,float16,0,3.473168055216471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,8,64,0,1,fp8,fp8,0,5.635653177897136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,8,64,0,1,float16,fp8,0,6.55410639444987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,float16,0,0.7255093256632487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,2,64,128,1,float16,fp8,0,0.7327306270599365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,48,64,0,1,fp8,fp8,0,3.0253012975056968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,48,64,0,1,float16,fp8,0,3.4994080861409507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,2,64,128,1,fp8,fp8,0,0.6768799622853597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,float16,0,0.7290666898091634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,float16,0,3.361973444620768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,4,64,128,1,float16,fp8,0,0.7353599866231283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,2,64,0,1,fp8,fp8,0,2.914485295613607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,4,64,128,1,fp8,fp8,0,0.6792639891306559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,2,64,0,1,float16,fp8,0,3.3653761545817056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,float16,0,0.7322080135345459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,float16,0,3.3720693588256836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,8,64,128,1,float16,fp8,0,0.7427732944488525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,4,64,0,1,fp8,fp8,0,2.922138532002767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,4,64,0,1,float16,fp8,0,3.379317283630371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,8,64,128,1,fp8,fp8,0,0.6871893405914307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,float16,0,0.5578666528066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,float16,0,3.3766613006591797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,48,64,128,1,float16,fp8,0,0.5589653253555298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,float16,0,1.9359199206034343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,8,64,0,1,fp8,fp8,0,2.926682790120443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,48,64,128,1,fp8,fp8,0,0.5258293151855469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,8,64,0,1,float16,fp8,0,3.3900534311930337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,float16,0,0.5600533485412598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,48,64,0,1,fp8,fp8,0,1.6824053128560383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,2,64,128,1,float16,fp8,0,0.5586719910303751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,48,64,0,1,float16,fp8,0,1.9376640319824219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,float16,0,1.9179573059082031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,2,64,128,1,fp8,fp8,0,0.5215679804484049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,float16,0,0.5590986808141073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,2,64,0,1,fp8,fp8,0,1.6813066800435383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,4,64,128,1,float16,fp8,0,0.5561226606369019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,2,64,0,1,float16,fp8,0,1.919365406036377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,4,64,128,1,fp8,fp8,0,0.5250613292058309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,float16,0,1.9134079615275066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,float16,0,0.5569493373235067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,4,64,0,1,float16,fp8,0,1.9225227038065593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,4,64,0,1,fp8,fp8,0,1.6816693941752117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,8,64,128,1,float16,fp8,0,0.5605226755142212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,8,64,128,1,fp8,fp8,0,0.5259253184000651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,float16,0,1.9169707298278809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,8,64,0,1,float16,fp8,0,1.923418680826823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,8,64,0,1,fp8,fp8,0,1.682586669921875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,float16,0,1.8437973658243816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,2,64,128,1,float16,fp8,0,1.8616426785786946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,2,64,128,1,fp8,fp8,0,1.693621317545573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,float16,0,1.8552853266398113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,float16,0,7.599231719970703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,2,64,0,1,fp8,fp8,0,6.577413558959961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,2,64,0,1,float16,fp8,0,7.630442937215169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,4,64,128,1,float16,fp8,0,1.8763945897420247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,4,64,128,1,fp8,fp8,0,1.7120213508605957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,float16,0,7.631903966267903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,float16,0,1.8800800641377766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,8,64,128,1,float16,fp8,0,1.8990772565205891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,4,64,0,1,fp8,fp8,0,6.589653650919597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,4,64,0,1,float16,fp8,0,7.647514979044597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,8,64,128,1,fp8,fp8,0,1.7406080563863118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,float16,0,1.0929280122121174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,float16,0,7.667519887288411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,48,64,128,1,float16,fp8,0,1.1208960215250652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,float16,0,4.05840524037679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,48,64,128,1,fp8,fp8,0,1.0460586547851562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,8,64,0,1,fp8,fp8,0,6.631541570027669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,8,64,0,1,float16,fp8,0,7.675498962402344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,float16,0,0.9734933376312256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,2,64,128,1,float16,fp8,0,0.9833652973175049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,48,64,0,1,fp8,fp8,0,3.5274667739868164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,48,64,0,1,float16,fp8,0,4.079306602478027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,2,64,128,1,fp8,fp8,0,0.8989066282908121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,float16,0,3.879269282023112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,float16,0,0.9782346884409586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,4,64,128,1,float16,fp8,0,0.9885546366373698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,2,64,0,1,fp8,fp8,0,3.3674399058024087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,2,64,0,1,float16,fp8,0,3.893813451131185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,4,64,128,1,fp8,fp8,0,0.9046133359273275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,float16,0,3.8902880350748696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,float16,0,0.9879999955495199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,8,64,128,1,float16,fp8,0,1.0006826718648274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,4,64,0,1,fp8,fp8,0,3.370800018310547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,4,64,0,1,float16,fp8,0,3.9062506357828775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,8,64,128,1,fp8,fp8,0,0.918346643447876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,float16,0,0.6053173144658407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,float16,0,3.908357302347819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,48,64,128,1,float16,fp8,0,0.6218560139338175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,8,64,0,1,fp8,fp8,0,3.3966347376505532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,8,64,0,1,float16,fp8,0,3.92902406056722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,float16,0,2.1156533559163413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,48,64,128,1,fp8,fp8,0,0.5878719886144003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,float16,0,0.5476746559143066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,2,64,128,1,float16,fp8,0,0.5538666645685831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,48,64,0,1,fp8,fp8,0,1.8552746772766113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,48,64,0,1,float16,fp8,0,2.132256031036377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,float16,0,2.029690742492676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,2,64,128,1,fp8,fp8,0,0.5123253266016642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,float16,0,0.550816019376119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,4,64,128,1,float16,fp8,0,0.5585013230641683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,2,64,0,1,fp8,fp8,0,1.7742932637532551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,2,64,0,1,float16,fp8,0,2.039365291595459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,float16,0,2.0397493044535318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,4,64,128,1,fp8,fp8,0,0.5168106555938721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,float16,0,0.5559786558151245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,4,64,0,1,fp8,fp8,0,1.7753067016601562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,4,64,0,1,float16,fp8,0,2.0429066022237143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,8,64,128,1,float16,fp8,0,0.5628960132598877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,8,64,128,1,fp8,fp8,0,0.5214773416519165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,float16,0,2.046010653177897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,float16,0,0.4230773448944092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,48,64,128,1,float16,fp8,0,0.4233226776123047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,8,64,0,1,fp8,fp8,0,1.7849653561909993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,8,64,0,1,float16,fp8,0,2.0659947395324707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,float16,0,1.207045316696167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,48,64,128,1,fp8,fp8,0,0.3982880115509033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,float16,0,0.42156799634297687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,48,64,0,1,float16,fp8,0,1.209455966949463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,48,64,0,1,fp8,fp8,0,1.056501309076945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,2,64,128,1,float16,fp8,0,0.4248533248901367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,float16,0,1.1901226838429768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,2,64,128,1,fp8,fp8,0,0.3984053134918213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,float16,0,0.421178658803304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,2,64,0,1,float16,fp8,0,1.1975093682607014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,2,64,0,1,fp8,fp8,0,1.0562079747517903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,4,64,128,1,float16,fp8,0,0.42182934284210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,float16,0,1.1971573034922283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,4,64,128,1,fp8,fp8,0,0.39821334679921466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,float16,0,0.4245973428090413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,4,64,0,1,float16,fp8,0,1.198031981786092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,4,64,0,1,fp8,fp8,0,1.053381363550822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,8,64,128,1,float16,fp8,0,0.4251946608225505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,float16,0,1.1985546747843425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,8,64,128,1,fp8,fp8,0,0.3991146485010783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,8,64,0,1,fp8,fp8,0,1.0561707019805908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,8,64,0,1,float16,fp8,0,1.1953173478444417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,float16,0,1.533690611521403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,2,64,128,1,float16,fp8,0,1.549455960591634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,2,64,128,1,fp8,fp8,0,1.4057493209838867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,float16,0,5.517018636067708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,2,64,0,1,fp8,fp8,0,4.774570782979329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,float16,0,1.5453227361043294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,2,64,0,1,float16,fp8,0,5.519509633382161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,4,64,128,1,float16,fp8,0,1.5603200594584148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,4,64,128,1,fp8,fp8,0,1.419962724049886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,float16,0,1.5624106725056965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,float16,0,5.533578872680664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,4,64,0,1,float16,fp8,0,5.547173182169597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,4,64,0,1,fp8,fp8,0,4.788784027099609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,8,64,128,1,float16,fp8,0,1.5806667009989421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,8,64,128,1,fp8,fp8,0,1.4420426686604817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,float16,0,0.9123093287150065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,float16,0,5.56059201558431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,48,64,128,1,float16,fp8,0,0.9358879725138346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,48,64,128,1,fp8,fp8,0,0.8746506373087565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,float16,0,2.9611946741739907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,8,64,0,1,fp8,fp8,0,4.818479855855306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,8,64,0,1,float16,fp8,0,5.57798957824707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,float16,0,0.8157333532969157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,2,64,128,1,float16,fp8,0,0.8248373667399088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,48,64,0,1,fp8,fp8,0,2.5942932764689126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,48,64,0,1,float16,fp8,0,2.9893919626871743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,2,64,128,1,fp8,fp8,0,0.7515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,float16,0,2.827712059020996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,float16,0,0.8182186285654703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,2,64,0,1,fp8,fp8,0,2.4614079793294272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,4,64,128,1,float16,fp8,0,0.8273226420084635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,2,64,0,1,float16,fp8,0,2.8386561075846353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,float16,0,2.8357067108154297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,4,64,128,1,fp8,fp8,0,0.7583093643188477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,float16,0,0.8258773485819498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,4,64,0,1,fp8,fp8,0,2.462165355682373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,8,64,128,1,float16,fp8,0,0.8362027009328207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,4,64,0,1,float16,fp8,0,2.8460747400919595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,8,64,128,1,fp8,fp8,0,0.7675893306732178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,float16,0,2.8531786600748696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,float16,0,0.5080053408940634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,48,64,128,1,float16,fp8,0,0.5217599868774414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,float16,0,1.5596213340759277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,8,64,0,1,fp8,fp8,0,2.478618621826172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,8,64,0,1,float16,fp8,0,2.863178571065267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,48,64,128,1,fp8,fp8,0,0.4944800138473511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,float16,0,0.45972267786661786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,48,64,0,1,fp8,fp8,0,1.3767679532368977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,2,64,128,1,float16,fp8,0,0.4639626741409302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,48,64,0,1,float16,fp8,0,1.5737600326538086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,float16,0,1.489674727121989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,2,64,128,1,fp8,fp8,0,0.4300853411356608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,float16,0,0.46404266357421875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,2,64,0,1,fp8,fp8,0,1.3058826923370361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,2,64,0,1,float16,fp8,0,1.4953707059224446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,4,64,128,1,float16,fp8,0,0.46825599670410156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,4,64,128,1,fp8,fp8,0,0.43277867635091144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,float16,0,1.4979732831319172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,float16,0,0.467957337697347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,4,64,0,1,fp8,fp8,0,1.3068853219350178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,4,64,0,1,float16,fp8,0,1.5016853014628093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,8,64,128,1,float16,fp8,0,0.473632017771403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,8,64,128,1,fp8,fp8,0,0.4379306634267171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,float16,0,1.5046720504760742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,float16,0,0.35313065846761066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,8,64,0,1,float16,fp8,0,1.5098986625671387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,48,64,128,1,float16,fp8,0,0.35337599118550617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,8,64,0,1,fp8,fp8,0,1.3162346680959065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,float16,0,0.9101386864980062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,48,64,128,1,fp8,fp8,0,0.3343520164489746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,float16,0,0.35542933146158856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,48,64,0,1,fp8,fp8,0,0.8014506498972574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,48,64,0,1,float16,fp8,0,0.9065386454264323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,2,64,128,1,float16,fp8,0,0.3555519978205363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,float16,0,0.8960106372833252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,2,64,128,1,fp8,fp8,0,0.3325759967168172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,float16,0,0.3540693521499634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,2,64,0,1,float16,fp8,0,0.893829345703125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,2,64,0,1,fp8,fp8,0,0.7932799657185873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,float16,0,0.8958880106608073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,4,64,128,1,float16,fp8,0,0.3547946612040202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,4,64,128,1,fp8,fp8,0,0.3325066765149434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,float16,0,0.3550186554590861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,4,64,0,1,float16,fp8,0,0.8978559970855713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,4,64,0,1,fp8,fp8,0,0.7954933643341064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,float16,0,0.8955946763356527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,8,64,128,1,float16,fp8,0,0.3551359971364339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,8,64,128,1,fp8,fp8,0,0.3348906834920247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,8,64,0,1,float16,fp8,0,0.8998560110727946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,8,64,0,1,fp8,fp8,0,0.7935733000437418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,float16,0,2.4202826817830405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,2,64,128,1,fp8,fp8,0,2.2203787167867026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,2,64,128,1,float16,fp8,0,2.439664045969645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,float16,0,2.439056078592936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,float16,0,7.4124908447265625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,2,64,0,1,fp8,fp8,0,6.436037063598633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,2,64,0,1,float16,fp8,0,7.46234130859375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,4,64,128,1,float16,fp8,0,2.4626399676005044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,4,64,128,1,fp8,fp8,0,2.242565313975016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,float16,0,7.461392084757487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,float16,0,2.4692373275756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,4,64,0,1,float16,fp8,0,7.475423812866211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,4,64,0,1,fp8,fp8,0,6.459280014038086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,8,64,128,1,float16,fp8,0,2.491861343383789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,8,64,128,1,fp8,fp8,0,2.2819199562072754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,float16,0,1.393130620320638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,float16,0,7.50486946105957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,48,64,128,1,float16,fp8,0,1.4249760309855144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,float16,0,3.9703734715779624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,8,64,0,1,float16,fp8,0,7.526458740234375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,8,64,0,1,fp8,fp8,0,6.519381205240886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,48,64,128,1,fp8,fp8,0,1.3267839749654133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,float16,0,1.22707732518514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,2,64,128,1,float16,fp8,0,1.240063985188802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,48,64,0,1,fp8,fp8,0,3.483189264933268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,48,64,0,1,float16,fp8,0,4.001157442728679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,2,64,128,1,fp8,fp8,0,1.1265333493550618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,float16,0,3.7519734700520835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,float16,0,1.235152006149292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,2,64,0,1,float16,fp8,0,3.761967976888021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,2,64,0,1,fp8,fp8,0,3.2587305704752603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,4,64,128,1,float16,fp8,0,1.2480746905008953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,float16,0,3.7734400431315103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,4,64,128,1,fp8,fp8,0,1.1353546778361003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,float16,0,1.249562660853068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,4,64,0,1,fp8,fp8,0,3.2657225926717124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,4,64,0,1,float16,fp8,0,3.773146629333496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,8,64,128,1,float16,fp8,0,1.2645173072814941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,8,64,128,1,fp8,fp8,0,1.1534079710642497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,float16,0,3.7879467010498047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,float16,0,0.7333920001983643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,48,64,128,1,float16,fp8,0,0.7530773480733236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,float16,0,2.0422560373942056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,8,64,0,1,fp8,fp8,0,3.2882668177286782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,48,64,128,1,fp8,fp8,0,0.7025547027587891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,8,64,0,1,float16,fp8,0,3.7970507939656577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,float16,0,0.6544693311055502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,48,64,0,1,fp8,fp8,0,1.8033812840779622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,2,64,128,1,float16,fp8,0,0.6621333360671997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,48,64,0,1,float16,fp8,0,2.062122662862142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,float16,0,1.9324639638264973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,2,64,128,1,fp8,fp8,0,0.6061013142267863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,float16,0,0.657093326250712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,2,64,0,1,fp8,fp8,0,1.6875680287679036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,2,64,0,1,float16,fp8,0,1.9415253003438313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,4,64,128,1,float16,fp8,0,0.6657546758651733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,4,64,128,1,fp8,fp8,0,0.6094719966252645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,float16,0,1.9424586296081543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,float16,0,0.6650240023930868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,4,64,0,1,fp8,fp8,0,1.6941866874694824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,4,64,0,1,float16,fp8,0,1.9507253964742024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,8,64,128,1,float16,fp8,0,0.6729333400726318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,float16,0,1.9536159833272297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,8,64,128,1,fp8,fp8,0,0.6202826499938965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,float16,0,0.41068267822265625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,48,64,128,1,float16,fp8,0,0.42286932468414307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,8,64,0,1,float16,fp8,0,1.9643360773722331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,float16,0,1.085957368214925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,8,64,0,1,fp8,fp8,0,1.7001652717590332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,48,64,128,1,fp8,fp8,0,0.399344007174174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,float16,0,0.37161068121592206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,48,64,0,1,fp8,fp8,0,0.966661294301351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,2,64,128,1,float16,fp8,0,0.37433067957560223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,48,64,0,1,float16,fp8,0,1.0949066480000813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,float16,0,1.031333367029826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,2,64,128,1,fp8,fp8,0,0.3495839834213257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,float16,0,0.37413867314656574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,2,64,0,1,fp8,fp8,0,0.9065386454264323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,2,64,0,1,float16,fp8,0,1.0321919918060303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,4,64,128,1,float16,fp8,0,0.3779093424479167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,4,64,128,1,fp8,fp8,0,0.35096001625061035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,float16,0,1.0359040101369221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,float16,0,0.37830400466918945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,4,64,0,1,float16,fp8,0,1.0380266507466633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,4,64,0,1,fp8,fp8,0,0.9108373324076334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,8,64,128,1,float16,fp8,0,0.3819679816563924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,8,64,128,1,fp8,fp8,0,0.3554133176803589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,float16,0,1.0413333574930828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,float16,0,0.28807999690373737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,8,64,0,1,float16,fp8,0,1.0470560391743977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,float16,0,0.6487040122350057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,8,64,0,1,fp8,fp8,0,0.9146613279978434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,48,64,128,1,float16,fp8,0,0.28782399495442706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,48,64,128,1,fp8,fp8,0,0.27322665850321454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,float16,0,0.2876533269882202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,48,64,0,1,fp8,fp8,0,0.5733973185221354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,48,64,0,1,float16,fp8,0,0.6483466625213623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,2,64,128,1,float16,fp8,0,0.28777599334716797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,float16,0,0.6355679829915365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,2,64,128,1,fp8,fp8,0,0.2712639967600505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,2,64,0,1,float16,fp8,0,0.6377066771189371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,float16,0,0.2877013285954793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,2,64,0,1,fp8,fp8,0,0.5666026671727499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,float16,0,0.6378719806671143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,4,64,128,1,fp8,fp8,0,0.2713279922803243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,4,64,128,1,float16,fp8,0,0.2895573377609253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,4,64,0,1,float16,fp8,0,0.6378026803334554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,float16,0,0.2889440059661865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,4,64,0,1,fp8,fp8,0,0.5680480003356934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,8,64,128,1,float16,fp8,0,0.2890346646308899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,float16,0,0.6389173269271851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,8,64,128,1,fp8,fp8,0,0.27297600110371906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,8,64,0,1,float16,fp8,0,0.6382186810175577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,8,64,0,1,fp8,fp8,0,0.5690720081329346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,float16,0,1.7933972676595051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,2,64,128,1,float16,fp8,0,1.8110666275024414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,2,64,128,1,fp8,fp8,0,1.64029328028361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,float16,0,4.539274533589681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,float16,0,1.80514129002889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,2,64,0,1,float16,fp8,0,4.557509422302246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,2,64,0,1,fp8,fp8,0,3.958458582560221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,4,64,128,1,fp8,fp8,0,1.6588427225748699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,4,64,128,1,float16,fp8,0,1.8213599522908528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,float16,0,4.567642529805501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,float16,0,1.8260639508565266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,4,64,0,1,float16,fp8,0,4.58791987101237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,4,64,0,1,fp8,fp8,0,3.9809707005818686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,8,64,128,1,float16,fp8,0,1.843839963277181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,8,64,128,1,fp8,fp8,0,1.685765266418457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,float16,0,1.0472959677378337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,float16,0,4.6002505620320635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,48,64,128,1,float16,fp8,0,1.0709546407063801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,8,64,0,1,fp8,fp8,0,4.005029360453288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,8,64,0,1,float16,fp8,0,4.6124693552653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,float16,0,2.4786292711893716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,48,64,128,1,fp8,fp8,0,0.9965600172678629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,float16,0,0.9234613577524821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,48,64,0,1,float16,fp8,0,2.5018720626831055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,48,64,0,1,fp8,fp8,0,2.1963094075520835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,2,64,128,1,float16,fp8,0,0.9327519734700521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,float16,0,2.3166186014811196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,2,64,128,1,fp8,fp8,0,0.8477760155995687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,float16,0,0.9312746524810791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,2,64,0,1,fp8,fp8,0,2.0209439595540366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,2,64,0,1,float16,fp8,0,2.321925322214762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,4,64,128,1,float16,fp8,0,0.9408000310262045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,float16,0,2.3222400347391763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,4,64,128,1,fp8,fp8,0,0.8552693525950114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,float16,0,0.9408373037974039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,4,64,0,1,fp8,fp8,0,2.0293493270874023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,4,64,0,1,float16,fp8,0,2.3331519762674966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,8,64,128,1,float16,fp8,0,0.9504746596018473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,float16,0,2.3413492838541665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,8,64,128,1,fp8,fp8,0,0.8685812950134277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,float16,0,0.5551253159840902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,48,64,128,1,float16,fp8,0,0.5684906641642252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,float16,0,1.286234696706136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,8,64,0,1,float16,fp8,0,2.3505226771036782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,8,64,0,1,fp8,fp8,0,2.0416266123453775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,48,64,128,1,fp8,fp8,0,0.5314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,float16,0,0.4947520097096761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,48,64,0,1,fp8,fp8,0,1.1463306744893391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,48,64,0,1,float16,fp8,0,1.3011360168457031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,2,64,128,1,float16,fp8,0,0.49908268451690674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,float16,0,1.2044533093770344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,2,64,128,1,fp8,fp8,0,0.45902399222056073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,float16,0,0.4987573226292928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,2,64,0,1,fp8,fp8,0,1.0584426720937092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,2,64,0,1,float16,fp8,0,1.2109866937001545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,4,64,128,1,float16,fp8,0,0.5049279928207397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,4,64,128,1,fp8,fp8,0,0.46349867184956867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,float16,0,1.2116693655649822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,float16,0,0.5039573510487875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,4,64,0,1,float16,fp8,0,1.216858704884847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,4,64,0,1,fp8,fp8,0,1.0634559790293376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,8,64,128,1,fp8,fp8,0,0.47018667062123615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,8,64,128,1,float16,fp8,0,0.5092480182647705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,float16,0,1.2209493319193523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,float16,0,0.3144426743189494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,float16,0,0.6949600378672282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,48,64,128,1,float16,fp8,0,0.3232373396555583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,8,64,0,1,float16,fp8,0,1.2259999910990398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,8,64,0,1,fp8,fp8,0,1.0717066923777263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,48,64,128,1,fp8,fp8,0,0.3042080005009969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,float16,0,0.28010666370391846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,48,64,0,1,float16,fp8,0,0.7030026912689209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,48,64,0,1,fp8,fp8,0,0.6238079865773519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,2,64,128,1,float16,fp8,0,0.28356800476710003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,float16,0,0.649893323580424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,2,64,128,1,fp8,fp8,0,0.2662506699562073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,2,64,0,1,float16,fp8,0,0.6518346468607584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,float16,0,0.2835200031598409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,2,64,0,1,fp8,fp8,0,0.5786879857381185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,float16,0,0.6514986753463745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,4,64,128,1,float16,fp8,0,0.2858133316040039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,4,64,128,1,fp8,fp8,0,0.267301340897878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,float16,0,0.28784533341725665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,4,64,0,1,float16,fp8,0,0.6569759845733643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,4,64,0,1,fp8,fp8,0,0.5806026856104533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,float16,0,0.65774933497111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,8,64,128,1,float16,fp8,0,0.2916746735572815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,8,64,128,1,fp8,fp8,0,0.2711306611696879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,float16,0,0.22075732549031576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,8,64,0,1,float16,fp8,0,0.6611733436584473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,float16,0,0.4280800024668376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,8,64,0,1,fp8,fp8,0,0.58460799853007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,48,64,128,1,float16,fp8,0,0.22203733523686728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,48,64,128,1,fp8,fp8,0,0.20768000682195029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,48,64,0,1,float16,fp8,0,0.42795733610788983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,float16,0,0.21808532873789468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,48,64,0,1,fp8,fp8,0,0.3822293281555176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,2,64,128,1,float16,fp8,0,0.2181439995765686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,float16,0,0.4187573194503784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,2,64,128,1,fp8,fp8,0,0.20770132541656494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,2,64,0,1,float16,fp8,0,0.418778657913208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,float16,0,0.21948800484339395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,2,64,0,1,fp8,fp8,0,0.3755679925282796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,4,64,128,1,float16,fp8,0,0.21935999393463135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,4,64,128,1,fp8,fp8,0,0.20611733198165894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,float16,0,0.41911999384562176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,float16,0,0.21864533424377441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,4,64,0,1,fp8,fp8,0,0.375983993212382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,4,64,0,1,float16,fp8,0,0.4190880060195923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,8,64,128,1,float16,fp8,0,0.21913599967956543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,8,64,128,1,fp8,fp8,0,0.20756266514460245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,float16,0,0.42049066225687665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,8,64,0,1,float16,fp8,0,0.4203733205795288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,8,64,0,1,fp8,fp8,0,0.3770773410797119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,float16,0,2.380256017049154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,2,64,128,1,float16,fp8,0,2.401514689127604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,2,64,128,1,fp8,fp8,0,2.1812960306803384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,float16,0,4.6855894724528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,float16,0,2.397712071736654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,2,64,0,1,float16,fp8,0,4.70684814453125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,2,64,0,1,fp8,fp8,0,4.113786697387695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,float16,0,4.712954521179199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,4,64,128,1,float16,fp8,0,2.4181973139444985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,4,64,128,1,fp8,fp8,0,2.2040106455485025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,float16,0,2.4319307009379068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,4,64,0,1,float16,fp8,0,4.726367950439453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,4,64,0,1,fp8,fp8,0,4.1436052322387695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,float16,0,4.762303988138835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,8,64,128,1,float16,fp8,0,2.4514293670654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,8,64,128,1,fp8,fp8,0,2.23964262008667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,float16,0,1.3554293314615886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,48,64,128,1,float16,fp8,0,1.3852640787760417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,float16,0,2.5915466944376626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,8,64,0,1,float16,fp8,0,4.779706637064616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,8,64,0,1,fp8,fp8,0,4.188053448994954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,48,64,128,1,fp8,fp8,0,1.2896959781646729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,float16,0,1.194426695505778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,48,64,0,1,float16,fp8,0,2.619861284891764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,48,64,0,1,fp8,fp8,0,2.3031999270121255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,2,64,128,1,float16,fp8,0,1.2050506273905437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,float16,0,2.3541547457377114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,2,64,128,1,fp8,fp8,0,1.0881760120391846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,float16,0,1.1993707021077473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,2,64,0,1,float16,fp8,0,2.3657387097676597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,2,64,0,1,fp8,fp8,0,2.067216078440348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,float16,0,2.3632532755533853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,4,64,128,1,fp8,fp8,0,1.0999200344085693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,4,64,128,1,float16,fp8,0,1.2128640015920003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,float16,0,1.2150239944458008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,4,64,0,1,float16,fp8,0,2.380154609680176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,4,64,0,1,fp8,fp8,0,2.0765226682027182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,float16,0,2.3859519958496094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,8,64,128,1,float16,fp8,0,1.2285119692484539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,8,64,128,1,fp8,fp8,0,1.1174240112304688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,float16,0,0.7016692956288656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,48,64,128,1,float16,fp8,0,0.7187360127766927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,8,64,0,1,float16,fp8,0,2.397775967915853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,8,64,0,1,fp8,fp8,0,2.0987733205159507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,float16,0,1.3299307028452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,48,64,128,1,fp8,fp8,0,0.6690133412679037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,float16,0,0.6205546855926514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,48,64,0,1,fp8,fp8,0,1.184048016866048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,48,64,0,1,float16,fp8,0,1.3448692957560222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,2,64,128,1,float16,fp8,0,0.6266560157140096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,float16,0,1.2113653024037678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,2,64,128,1,fp8,fp8,0,0.571669340133667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,2,64,0,1,float16,fp8,0,1.2158453464508057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,float16,0,0.6242666641871134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,2,64,0,1,fp8,fp8,0,1.0699040095011394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,float16,0,1.2180853684743245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,4,64,128,1,float16,fp8,0,0.6303893327713013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,4,64,128,1,fp8,fp8,0,0.5761546691258749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,float16,0,0.6317653258641561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,4,64,0,1,float16,fp8,0,1.225098689397176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,4,64,0,1,fp8,fp8,0,1.074442704518636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,float16,0,1.2278453509012859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,8,64,128,1,float16,fp8,0,0.6396640141805013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,8,64,128,1,fp8,fp8,0,0.5848426818847656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,float16,0,0.37582401434580487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,8,64,0,1,float16,fp8,0,1.2360906600952148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,float16,0,0.6986453533172607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,8,64,0,1,fp8,fp8,0,1.0855627059936523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,48,64,128,1,float16,fp8,0,0.38607466220855713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,48,64,128,1,fp8,fp8,0,0.3617013295491536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,float16,0,0.33293332656224567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,48,64,0,1,float16,fp8,0,0.7097866535186768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,48,64,0,1,fp8,fp8,0,0.6274346510569254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,2,64,128,1,float16,fp8,0,0.33694398403167725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,float16,0,0.6377013524373373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,2,64,128,1,fp8,fp8,0,0.31324267387390137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,2,64,0,1,float16,fp8,0,0.6423786481221517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,float16,0,0.3365226586659749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,2,64,0,1,fp8,fp8,0,0.5686879952748617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,float16,0,0.6426560084025065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,4,64,128,1,float16,fp8,0,0.3407520055770874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,4,64,128,1,fp8,fp8,0,0.31616532802581787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,4,64,0,1,float16,fp8,0,0.6452213525772095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,float16,0,0.3410773277282715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,4,64,0,1,fp8,fp8,0,0.5715893507003784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,8,64,128,1,float16,fp8,0,0.34572800000508624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,8,64,128,1,fp8,fp8,0,0.3205546736717224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,float16,0,0.6478986740112305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,float16,0,0.21594667434692383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,8,64,0,1,float16,fp8,0,0.6520320177078247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,8,64,0,1,fp8,fp8,0,0.5780266523361206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,float16,0,0.38516799608866376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,48,64,128,1,float16,fp8,0,0.22047466039657593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,48,64,128,1,fp8,fp8,0,0.20946667591730753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,float16,0,0.18958399693171182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,48,64,0,1,fp8,fp8,0,0.3491946856180827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,48,64,0,1,float16,fp8,0,0.3908960024515788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,2,64,128,1,float16,fp8,0,0.192138671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,2,64,128,1,fp8,fp8,0,0.18287465969721475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,float16,0,0.34933332602183026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,float16,0,0.1907306710879008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,2,64,0,1,fp8,fp8,0,0.3181706666946411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,2,64,0,1,float16,fp8,0,0.35316268603007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,4,64,128,1,float16,fp8,0,0.19420800606409708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,4,64,128,1,fp8,fp8,0,0.1840320030848185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,float16,0,0.3524906635284424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,float16,0,0.19499200582504272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,4,64,0,1,fp8,fp8,0,0.32024532556533813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,4,64,0,1,float16,fp8,0,0.35578668117523193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,8,64,128,1,float16,fp8,0,0.1955733299255371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,8,64,128,1,fp8,fp8,0,0.18711467583974203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,float16,0,0.3556693394978841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,float16,0,0.1546293298403422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,8,64,0,1,fp8,fp8,0,0.3242666721343994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,8,64,0,1,float16,fp8,0,0.35841067632039386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,float16,0,0.2487199902534485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,48,64,128,1,float16,fp8,0,0.15431466698646545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,48,64,128,1,fp8,fp8,0,0.1460533340771993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,float16,0,0.1524853308995565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,48,64,0,1,fp8,fp8,0,0.22415467103322348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,48,64,0,1,float16,fp8,0,0.24751466512680054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,2,64,128,1,float16,fp8,0,0.15282133221626282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,2,64,128,1,fp8,fp8,0,0.14436266819636026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,float16,0,0.24216532707214355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,float16,0,0.15262400110562643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,2,64,0,1,fp8,fp8,0,0.2182613412539164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,2,64,0,1,float16,fp8,0,0.24279999732971191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,4,64,128,1,float16,fp8,0,0.15253333250681558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,4,64,128,1,fp8,fp8,0,0.14422399799029031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,float16,0,0.24054932594299316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,float16,0,0.15268799662590027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,4,64,0,1,fp8,fp8,0,0.21801066398620605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,float16,0,0.24123199780782065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,4,64,0,1,float16,fp8,0,0.243776003519694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,8,64,128,1,float16,fp8,0,0.15243732929229736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,8,64,128,1,fp8,fp8,0,0.14404799540837607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,8,64,0,1,float16,fp8,0,0.24251733223597208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,8,64,0,1,fp8,fp8,0,0.21902400255203247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,float16,0,1.7633172671000164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,2,64,128,1,float16,fp8,0,1.7802027066548665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,2,64,128,1,fp8,fp8,0,1.6121652921040852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,float16,0,2.984623908996582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,float16,0,1.7757493654886882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,2,64,0,1,fp8,fp8,0,2.6353440284729004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,2,64,0,1,float16,fp8,0,2.995135943094889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,float16,0,3.001445452372233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,4,64,128,1,float16,fp8,0,1.7914826075236003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,4,64,128,1,fp8,fp8,0,1.628933270772298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,4,64,0,1,float16,fp8,0,3.0137812296549478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,float16,0,1.8005760510762532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,4,64,0,1,fp8,fp8,0,2.654714743296305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,float16,0,3.030735969543457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,8,64,128,1,float16,fp8,0,1.815440018971761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,8,64,128,1,fp8,fp8,0,1.6561439832051594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,float16,0,1.018122673034668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,8,64,0,1,float16,fp8,0,3.048207918802897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,float16,0,1.674202601114909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,8,64,0,1,fp8,fp8,0,2.6845601399739585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,48,64,128,1,float16,fp8,0,1.0391519864400227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,48,64,128,1,fp8,fp8,0,0.9672586917877197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,float16,0,0.8961813449859619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,48,64,0,1,float16,fp8,0,1.6971413294474285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,48,64,0,1,fp8,fp8,0,1.5099892616271973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,2,64,128,1,float16,fp8,0,0.906218687693278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,float16,0,1.5153387387593586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,2,64,128,1,fp8,fp8,0,0.8210986455281576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,2,64,0,1,float16,fp8,0,1.523146629333496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,float16,0,0.9027146498362223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,2,64,0,1,fp8,fp8,0,1.3379626274108887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,float16,0,1.5234773953755696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,4,64,128,1,float16,fp8,0,0.9129227002461752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,4,64,128,1,fp8,fp8,0,0.8284800052642822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,4,64,0,1,float16,fp8,0,1.5304479598999023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,float16,0,0.9143573443094889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,4,64,0,1,fp8,fp8,0,1.3485760688781738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,float16,0,1.5381919542948406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,8,64,128,1,float16,fp8,0,0.9248960018157959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,8,64,128,1,fp8,fp8,0,0.8424320220947266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,float16,0,0.5298933188120524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,float16,0,0.8657973607381185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,8,64,0,1,float16,fp8,0,1.5489706993103027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,8,64,0,1,fp8,fp8,0,1.3619306882222493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,48,64,128,1,float16,fp8,0,0.5434399843215942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,48,64,128,1,fp8,fp8,0,0.5070399840672811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,48,64,0,1,float16,fp8,0,0.8780053456624349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,48,64,0,1,fp8,fp8,0,0.7831146717071533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,float16,0,0.46935999393463135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,2,64,128,1,float16,fp8,0,0.47280001640319824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,float16,0,0.7848320007324219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,2,64,128,1,fp8,fp8,0,0.4339093367258708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,float16,0,0.47248534361521405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,2,64,0,1,float16,fp8,0,0.788693348566691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,2,64,0,1,fp8,fp8,0,0.699023962020874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,4,64,128,1,float16,fp8,0,0.4779733419418335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,4,64,128,1,fp8,fp8,0,0.4376426537831624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,float16,0,0.7900906403859457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,4,64,0,1,fp8,fp8,0,0.703333298365275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,4,64,0,1,float16,fp8,0,0.7928000291188558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,float16,0,0.4788320064544678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,8,64,128,1,float16,fp8,0,0.4834666649500529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,8,64,128,1,fp8,fp8,0,0.4437280098597209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,float16,0,0.7958079973856608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,float16,0,0.286080002784729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,8,64,0,1,fp8,fp8,0,0.7100266615549723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,8,64,0,1,float16,fp8,0,0.8027093410491943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,float16,0,0.46165335178375244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,48,64,128,1,float16,fp8,0,0.2940640052159627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,48,64,128,1,fp8,fp8,0,0.27716267108917236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,float16,0,0.2532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,48,64,0,1,float16,fp8,0,0.4689653317133586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,48,64,0,1,fp8,fp8,0,0.41954131921132404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,2,64,128,1,float16,fp8,0,0.256549338499705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,float16,0,0.416592001914978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,2,64,128,1,fp8,fp8,0,0.23919999599456787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,2,64,0,1,float16,fp8,0,0.4177173376083374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,float16,0,0.2548159956932068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,2,64,0,1,fp8,fp8,0,0.37749334176381427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,float16,0,0.42105599244435626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,4,64,128,1,float16,fp8,0,0.25733333826065063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,4,64,128,1,fp8,fp8,0,0.24052266279856363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,4,64,0,1,float16,fp8,0,0.4222186803817749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,float16,0,0.260042667388916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,4,64,0,1,fp8,fp8,0,0.3807679812113444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,float16,0,0.4246026674906413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,8,64,128,1,float16,fp8,0,0.26286399364471436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,8,64,128,1,fp8,fp8,0,0.24540799856185913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,float16,0,0.16473066806793213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,8,64,0,1,float16,fp8,0,0.42929601669311523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,8,64,0,1,fp8,fp8,0,0.38413333892822266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,float16,0,0.2595466574033101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,48,64,128,1,float16,fp8,0,0.16897066434224448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,48,64,128,1,fp8,fp8,0,0.1611840029557546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,48,64,0,1,float16,fp8,0,0.2637386719385783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,float16,0,0.14429333806037903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,48,64,0,1,fp8,fp8,0,0.2390399972597758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,float16,0,0.23447465896606445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,2,64,128,1,float16,fp8,0,0.14631999532381693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,2,64,128,1,fp8,fp8,0,0.13610133528709412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,2,64,0,1,float16,fp8,0,0.23477333784103394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,2,64,0,1,fp8,fp8,0,0.2101866602897644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,float16,0,0.1444586714108785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,float16,0,0.234442671140035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,4,64,128,1,float16,fp8,0,0.14685333768526712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,4,64,128,1,fp8,fp8,0,0.13797332843144736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,4,64,0,1,float16,fp8,0,0.23677333196004233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,4,64,0,1,fp8,fp8,0,0.2121653358141581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,float16,0,0.14672000209490457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,float16,0,0.23676800727844238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,8,64,128,1,float16,fp8,0,0.14845866958300272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,8,64,128,1,fp8,fp8,0,0.14219199617703757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,8,64,0,1,float16,fp8,0,0.238864004611969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,float16,0,0.11652800440788269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,8,64,0,1,fp8,fp8,0,0.2180266578992208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,float16,0,0.17045867443084717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,48,64,128,1,fp8,fp8,0,0.11270933349927266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,48,64,128,1,float16,fp8,0,0.11592533191045125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,48,64,0,1,float16,fp8,0,0.17043733596801758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,48,64,0,1,fp8,fp8,0,0.15804266929626465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,float16,0,0.11563733220100403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,2,64,128,1,float16,fp8,0,0.11557867129643758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,float16,0,0.16850133736928305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,2,64,128,1,fp8,fp8,0,0.10962133606274922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,2,64,0,1,float16,fp8,0,0.16861865917841592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,2,64,0,1,fp8,fp8,0,0.15380799770355225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,float16,0,0.11601600050926208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,float16,0,0.16873067617416382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,4,64,128,1,float16,fp8,0,0.1153600017229716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,4,64,128,1,fp8,fp8,0,0.10925333698590596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,4,64,0,1,fp8,fp8,0,0.15436800320943198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,4,64,0,1,float16,fp8,0,0.16995733976364136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,float16,0,0.11550399661064148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,float16,0,0.16861865917841592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,8,64,128,1,float16,fp8,0,0.11548266808191936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,8,64,128,1,fp8,fp8,0,0.109525332848231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,8,64,0,1,float16,fp8,0,0.1686240037282308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,8,64,0,1,fp8,fp8,0,0.15390400091807047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,float16,0,2.401989301045736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,2,64,128,1,fp8,fp8,0,2.146176020304362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,2,64,128,1,float16,fp8,0,2.4028372764587402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,float16,0,3.349775950113932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,float16,0,2.4144585927327475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,2,64,0,1,fp8,fp8,0,2.9293813705444336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,2,64,0,1,float16,fp8,0,3.3431307474772134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,4,64,128,1,float16,fp8,0,2.4115146001180015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,4,64,128,1,fp8,fp8,0,2.1672213872273765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,float16,0,3.3554986317952475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,4,64,0,1,fp8,fp8,0,2.9556585947672525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,float16,0,2.504997412363688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,4,64,0,1,float16,fp8,0,3.3615732192993164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,8,64,128,1,fp8,fp8,0,2.195178667704264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,8,64,128,1,float16,fp8,0,2.4985119501749673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,float16,0,3.460693359375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,8,64,0,1,float16,fp8,0,3.461573282877604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,float16,0,1.3407626152038574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,float16,0,1.8588852882385254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,48,64,128,1,float16,fp8,0,1.364554723103841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,8,64,0,1,fp8,fp8,0,2.9864800771077475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,48,64,128,1,fp8,fp8,0,1.2704799969991047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,float16,0,1.1745866934458415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,48,64,0,1,fp8,fp8,0,1.7001813252766926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,48,64,0,1,float16,fp8,0,1.8835412661234539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,float16,0,1.651290734608968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,2,64,128,1,float16,fp8,0,1.186085303624471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,2,64,128,1,fp8,fp8,0,1.072160005569458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,2,64,0,1,fp8,fp8,0,1.4685707092285156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,2,64,0,1,float16,fp8,0,1.66321595509847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,float16,0,1.185754696528117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,float16,0,1.66267728805542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,4,64,128,1,float16,fp8,0,1.1966453393300374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,4,64,128,1,fp8,fp8,0,1.0820586681365967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,4,64,0,1,float16,fp8,0,1.6733867327372234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,float16,0,1.1991679668426514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,4,64,0,1,fp8,fp8,0,1.4795254071553547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,float16,0,1.6822452545166016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,8,64,128,1,float16,fp8,0,1.2101653416951497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,8,64,128,1,fp8,fp8,0,1.0998986562093098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,8,64,0,1,float16,fp8,0,1.6940107345581055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,float16,0,0.6838613351186117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,8,64,0,1,fp8,fp8,0,1.5019466082255046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,float16,0,0.9480106830596924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,48,64,128,1,float16,fp8,0,0.7002560297648112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,48,64,128,1,fp8,fp8,0,0.6518719991048177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,48,64,0,1,fp8,fp8,0,0.8703839778900146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,float16,0,0.602949341138204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,48,64,0,1,float16,fp8,0,0.9638400077819824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,float16,0,0.8463892936706543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,2,64,128,1,fp8,fp8,0,0.5518933137257894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,2,64,128,1,float16,fp8,0,0.611135999361674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,2,64,0,1,float16,fp8,0,0.8527572949727377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,2,64,0,1,fp8,fp8,0,0.7559946378072103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,float16,0,0.6078986724217733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,float16,0,0.8517706394195557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,4,64,128,1,fp8,fp8,0,0.5594026645024618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,4,64,128,1,float16,fp8,0,0.6154400110244751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,4,64,0,1,float16,fp8,0,0.857647975285848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,4,64,0,1,fp8,fp8,0,0.7628959814707438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,float16,0,0.6160693168640137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,float16,0,0.8609066804250082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,8,64,128,1,float16,fp8,0,0.6234293381373087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,8,64,128,1,fp8,fp8,0,0.5666720072428385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,8,64,0,1,float16,fp8,0,0.8678879737854004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,float16,0,0.3595573504765828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,8,64,0,1,fp8,fp8,0,0.7706133524576823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,float16,0,0.49534400304158527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,48,64,128,1,float16,fp8,0,0.3693600098292033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,48,64,128,1,fp8,fp8,0,0.3442346652348836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,48,64,0,1,fp8,fp8,0,0.45746668179829914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,48,64,0,1,float16,fp8,0,0.5046399831771851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,float16,0,0.3163040081659953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,float16,0,0.4397706588109334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,2,64,128,1,float16,fp8,0,0.31995733578999835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,2,64,128,1,fp8,fp8,0,0.2953493396441142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,2,64,0,1,float16,fp8,0,0.4428693453470866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,2,64,0,1,fp8,fp8,0,0.40035200119018555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,float16,0,0.31959466139475506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,4,64,128,1,float16,fp8,0,0.32257066170374554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,float16,0,0.4461119969685872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,4,64,128,1,fp8,fp8,0,0.2990293304125468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,4,64,0,1,float16,fp8,0,0.44777599970499676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,float16,0,0.324565331141154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,4,64,0,1,fp8,fp8,0,0.4028533299763997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,float16,0,0.4506719907124837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,8,64,128,1,float16,fp8,0,0.328874667485555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,8,64,128,1,fp8,fp8,0,0.30262933174769086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,8,64,0,1,float16,fp8,0,0.4543573458989461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,8,64,0,1,fp8,fp8,0,0.4080053170522054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,float16,0,0.19750932852427164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,float16,0,0.2696693340937297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,48,64,128,1,float16,fp8,0,0.2039626638094584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,48,64,128,1,fp8,fp8,0,0.19056000312169394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,48,64,0,1,float16,fp8,0,0.2751573324203491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,float16,0,0.17062399784723917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,48,64,0,1,fp8,fp8,0,0.24953599770863852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,float16,0,0.23477866252263388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,2,64,128,1,float16,fp8,0,0.17359999815622965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,2,64,128,1,fp8,fp8,0,0.16274666786193848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,float16,0,0.17193067073822021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,2,64,0,1,fp8,fp8,0,0.21799999475479126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,2,64,0,1,float16,fp8,0,0.23759466409683228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,float16,0,0.23883734146753946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,4,64,128,1,fp8,fp8,0,0.16461333632469177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,4,64,128,1,float16,fp8,0,0.1744800011316935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,4,64,0,1,float16,fp8,0,0.2409600019454956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,float16,0,0.17665066321690878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,4,64,0,1,fp8,fp8,0,0.22210667530695596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,float16,0,0.24196799596150717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,8,64,128,1,float16,fp8,0,0.17865065733591715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,8,64,128,1,fp8,fp8,0,0.16929600636164346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,8,64,0,1,float16,fp8,0,0.24382400512695312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,float16,0,0.11553600430488586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,8,64,0,1,fp8,fp8,0,0.22472532590230307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,48,64,128,1,float16,fp8,0,0.11839999755223592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,48,64,128,1,fp8,fp8,0,0.11375466982523601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,float16,0,0.1544319987297058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,48,64,0,1,float16,fp8,0,0.15659733613332114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,48,64,0,1,fp8,fp8,0,0.14569066961606345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,float16,0,0.10167466600735982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,float16,0,0.13822399576505026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,2,64,128,1,float16,fp8,0,0.10272000233332317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,2,64,128,1,fp8,fp8,0,0.0938933293024699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,2,64,0,1,float16,fp8,0,0.14036800463994345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,2,64,0,1,fp8,fp8,0,0.126202662785848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,float16,0,0.1033066709836324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,float16,0,0.14028267065684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,4,64,128,1,float16,fp8,0,0.1032426655292511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,4,64,128,1,fp8,fp8,0,0.095551997423172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,4,64,0,1,float16,fp8,0,0.1402346690495809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,4,64,0,1,fp8,fp8,0,0.12602133552233377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,float16,0,0.10403733452161153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,float16,0,0.13896532853444418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,8,64,128,1,float16,fp8,0,0.10544000069300334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,8,64,128,1,fp8,fp8,0,0.09731733798980713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,8,64,0,1,fp8,fp8,0,0.12775466839472452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,8,64,0,1,float16,fp8,0,0.14087466398874918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,float16,0,0.10733866691589355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,float16,0,0.08310933411121368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,48,64,128,1,float16,fp8,0,0.08309333523114522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,48,64,128,1,fp8,fp8,0,0.07877333462238312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,48,64,0,1,fp8,fp8,0,0.09924800197283427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,48,64,0,1,float16,fp8,0,0.10780266920725505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,float16,0,0.08301866551240285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,float16,0,0.1074666678905487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,2,64,128,1,float16,fp8,0,0.08341866731643677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,2,64,128,1,fp8,fp8,0,0.07876266539096832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,2,64,0,1,float16,fp8,0,0.10762666662534077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,2,64,0,1,fp8,fp8,0,0.09776000181833903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,float16,0,0.08448533217112224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,float16,0,0.10739733775456746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,4,64,128,1,float16,fp8,0,0.08361066381136577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,4,64,128,1,fp8,fp8,0,0.0790293316046397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,4,64,0,1,float16,fp8,0,0.10719466209411621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,4,64,0,1,fp8,fp8,0,0.09974400202433269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,float16,0,0.08302933474381764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,float16,0,0.10749333103497823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,8,64,128,1,fp8,fp8,0,0.07874133189519246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,8,64,128,1,float16,fp8,0,0.08334933718045552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,8,64,0,1,float16,fp8,0,0.10742933551470439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,8,64,0,1,fp8,fp8,0,0.09743466973304749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,float16,0,1.7710132598876953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,2,64,128,1,float16,fp8,0,1.7744639714558919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,float16,0,2.235429286956787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,2,64,128,1,fp8,fp8,0,1.6021973292032878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,2,64,0,1,fp8,fp8,0,1.9769973754882812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,2,64,0,1,float16,fp8,0,2.237552007039388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,float16,0,1.786293347676595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,float16,0,2.249178727467855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,4,64,128,1,fp8,fp8,0,1.6139893531799316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,4,64,128,1,float16,fp8,0,1.7875253359476726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,4,64,0,1,float16,fp8,0,2.2561440467834473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,4,64,0,1,fp8,fp8,0,1.9957599639892578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,float16,0,1.838314692179362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,float16,0,2.306543986002604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,8,64,128,1,float16,fp8,0,1.8166346549987793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,8,64,128,1,fp8,fp8,0,1.642533302307129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,float16,0,1.0088533560434978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,8,64,0,1,float16,fp8,0,2.2854347229003906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,8,64,0,1,fp8,fp8,0,2.0219039916992188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,48,64,128,1,float16,fp8,0,1.025818665822347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,float16,0,1.2704746723175049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,48,64,128,1,fp8,fp8,0,0.9550346533457438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,48,64,0,1,float16,fp8,0,1.2854506969451904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,float16,0,0.8846666812896729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,48,64,0,1,fp8,fp8,0,1.165509303410848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,float16,0,1.1158826351165771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,2,64,128,1,fp8,fp8,0,0.8079360326131185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,2,64,128,1,float16,fp8,0,0.8923679987589518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,2,64,0,1,float16,fp8,0,1.1249120235443115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,2,64,0,1,fp8,fp8,0,0.9981813430786133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,float16,0,0.8897120157877604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,float16,0,1.1234880288441975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,4,64,128,1,float16,fp8,0,0.898693323135376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,4,64,128,1,fp8,fp8,0,0.8140906492869059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,4,64,0,1,float16,fp8,0,1.1335999965667725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,4,64,0,1,fp8,fp8,0,1.0081013043721516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,float16,0,0.9009706974029541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,float16,0,1.1383146444956462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,8,64,128,1,fp8,fp8,0,0.8276906808217367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,8,64,128,1,float16,fp8,0,0.9123946825663248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,8,64,0,1,float16,fp8,0,1.1465173562367756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,float16,0,0.5178613265355428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,float16,0,0.6521759827931722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,48,64,128,1,float16,fp8,0,0.5289813280105591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,8,64,0,1,fp8,fp8,0,1.0221013228098552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,48,64,128,1,fp8,fp8,0,0.49380266666412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,48,64,0,1,float16,fp8,0,0.6613226731618246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,48,64,0,1,fp8,fp8,0,0.6021493275960287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,float16,0,0.45553600788116455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,float16,0,0.5740640163421631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,2,64,128,1,float16,fp8,0,0.4607626597086589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,2,64,128,1,fp8,fp8,0,0.4203893343607585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,2,64,0,1,float16,fp8,0,0.5785439809163412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,2,64,0,1,fp8,fp8,0,0.518336017926534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,float16,0,0.46004267533620197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,4,64,128,1,float16,fp8,0,0.463642676671346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,4,64,128,1,fp8,fp8,0,0.42371733983357746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,float16,0,0.5785173177719116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,float16,0,0.4657546679178874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,4,64,0,1,fp8,fp8,0,0.521727999051412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,4,64,0,1,float16,fp8,0,0.5835626522699991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,8,64,128,1,float16,fp8,0,0.4706079959869385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,8,64,128,1,fp8,fp8,0,0.43110934893290204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,float16,0,0.5858560005823771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,float16,0,0.2731626629829407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,8,64,0,1,float16,fp8,0,0.5914933284123739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,8,64,0,1,fp8,fp8,0,0.5300373236338297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,float16,0,0.3434186776479085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,48,64,128,1,float16,fp8,0,0.2810399929682414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,48,64,128,1,fp8,fp8,0,0.26289600133895874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,48,64,0,1,float16,fp8,0,0.35098667939503986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,float16,0,0.2376213272412618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,48,64,0,1,fp8,fp8,0,0.32054932912190753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,float16,0,0.2981226642926534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,2,64,128,1,fp8,fp8,0,0.22527466217676798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,2,64,128,1,float16,fp8,0,0.23970667521158853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,2,64,0,1,float16,fp8,0,0.30237332979838055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,2,64,0,1,fp8,fp8,0,0.2773653268814087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,float16,0,0.24075732628504434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,float16,0,0.3022986650466919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,4,64,128,1,fp8,fp8,0,0.22829333941141763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,4,64,128,1,float16,fp8,0,0.2435893416404724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,4,64,0,1,float16,fp8,0,0.3057386676470439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,4,64,0,1,fp8,fp8,0,0.27881066004435223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,float16,0,0.24539732933044434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,float16,0,0.3085813323656718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,8,64,128,1,float16,fp8,0,0.249071995417277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,8,64,128,1,fp8,fp8,0,0.23068799575169882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,8,64,0,1,float16,fp8,0,0.3115626573562622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,float16,0,0.1514026621977488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,8,64,0,1,fp8,fp8,0,0.28432534138361615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,float16,0,0.1893333395322164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,48,64,128,1,float16,fp8,0,0.15480533242225647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,48,64,128,1,fp8,fp8,0,0.14894400040308634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,48,64,0,1,fp8,fp8,0,0.17704000075658163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,48,64,0,1,float16,fp8,0,0.19377599159876505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,float16,0,0.1288266678651174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,float16,0,0.16332266728083292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,2,64,128,1,float16,fp8,0,0.1302880048751831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,2,64,128,1,fp8,fp8,0,0.12135466933250427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,2,64,0,1,float16,fp8,0,0.16564266880353293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,float16,0,0.12981866796811423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,2,64,0,1,fp8,fp8,0,0.15078933040301004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,float16,0,0.1636319955190023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,4,64,128,1,float16,fp8,0,0.13192533453305563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,4,64,128,1,fp8,fp8,0,0.12397866447766621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,4,64,0,1,float16,fp8,0,0.16614400347073874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,float16,0,0.13199999928474426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,4,64,0,1,fp8,fp8,0,0.1509866714477539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,float16,0,0.16659733653068542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,8,64,128,1,float16,fp8,0,0.1345866620540619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,8,64,128,1,fp8,fp8,0,0.1286133329073588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,8,64,0,1,float16,fp8,0,0.16820800304412842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,8,64,0,1,fp8,fp8,0,0.15662399927775064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,float16,0,0.10845333337783813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,float16,0,0.08603200316429138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,48,64,128,1,float16,fp8,0,0.08933333555857341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,48,64,128,1,fp8,fp8,0,0.08703999718030293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,48,64,0,1,float16,fp8,0,0.11128000418345134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,48,64,0,1,fp8,fp8,0,0.10588266452153523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,float16,0,0.0895146628220876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,float16,0,0.09896533687909444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,2,64,128,1,float16,fp8,0,0.07972800234953563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,2,64,0,1,float16,fp8,0,0.10124799609184265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,2,64,128,1,fp8,fp8,0,0.075162669022878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,2,64,0,1,fp8,fp8,0,0.09115733702977498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,float16,0,0.08021866778532664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,float16,0,0.0997226635615031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,4,64,128,1,fp8,fp8,0,0.07460799813270569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,4,64,128,1,float16,fp8,0,0.07922666768232982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,4,64,0,1,float16,fp8,0,0.10103467106819153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,float16,0,0.07869866490364075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,4,64,0,1,fp8,fp8,0,0.09133866429328918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,float16,0,0.10000532865524292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,8,64,128,1,float16,fp8,0,0.08074133098125458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,8,64,128,1,fp8,fp8,0,0.07457066575686137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,8,64,0,1,float16,fp8,0,0.10157333811124165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,8,64,0,1,fp8,fp8,0,0.09143466750780742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,float16,0,0.06437333424886067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,float16,0,0.07986133297284444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,48,64,128,1,float16,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,48,64,128,1,fp8,fp8,0,0.06205333272616068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,48,64,0,1,float16,fp8,0,0.07981333136558533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,48,64,0,1,fp8,fp8,0,0.0737120012442271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,float16,0,0.06419200201829274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,2,64,128,1,float16,fp8,0,0.06436799963315327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,2,64,128,1,fp8,fp8,0,0.06235733131567637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,float16,0,0.08005333443482716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,2,64,0,1,float16,fp8,0,0.08074133098125458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,2,64,0,1,fp8,fp8,0,0.07403199871381123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,float16,0,0.0647626668214798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,4,64,128,1,float16,fp8,0,0.06451199948787689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,float16,0,0.08070399860541026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,4,64,128,1,fp8,fp8,0,0.06244266529877981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,4,64,0,1,float16,fp8,0,0.07871466875076294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,4,64,0,1,fp8,fp8,0,0.07424533367156982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,float16,0,0.06483200192451477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,8,64,128,1,float16,fp8,0,0.06443733473618825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,float16,0,0.07914133369922638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,8,64,128,1,fp8,fp8,0,0.060746664802233376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,8,64,0,1,float16,fp8,0,0.0793333351612091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,8,64,0,1,fp8,fp8,0,0.0745600014925003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,float16,0,2.083183924357096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,2,64,128,1,float16,fp8,0,2.075157324473063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,float16,0,2.375114599863688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,2,64,128,1,fp8,fp8,0,2.0309653282165527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,2,64,0,1,float16,fp8,0,2.361957391103109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,2,64,0,1,fp8,fp8,0,2.2669013341267905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,float16,0,2.0878027280171714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,4,64,128,1,float16,fp8,0,2.0845813751220703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,float16,0,2.3743200302124023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,4,64,128,1,fp8,fp8,0,2.1143199602762857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,4,64,0,1,float16,fp8,0,2.3704479535420737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,float16,0,2.1317173639933267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,4,64,0,1,fp8,fp8,0,2.3203840255737305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,float16,0,2.4000107447306314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,8,64,128,1,float16,fp8,0,2.104778607686361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,8,64,128,1,fp8,fp8,0,2.0682719548543296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,8,64,0,1,float16,fp8,0,2.4146666526794434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,8,64,0,1,fp8,fp8,0,2.3319679896036782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,float16,0,1.1307466824849446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,float16,0,1.2977759838104248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,48,64,128,1,float16,fp8,0,1.112389326095581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,48,64,128,1,fp8,fp8,0,1.1442186832427979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,48,64,0,1,float16,fp8,0,1.282805363337199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,float16,0,1.0513439973195393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,48,64,0,1,fp8,fp8,0,1.2726506392161052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,float16,0,1.1943519910176594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,2,64,128,1,float16,fp8,0,1.051103989283244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,2,64,128,1,fp8,fp8,0,0.9840266704559326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,2,64,0,1,float16,fp8,0,1.192309300104777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,2,64,0,1,fp8,fp8,0,1.0988852977752686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,float16,0,1.0505332946777344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,float16,0,1.1997333367665608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,4,64,128,1,float16,fp8,0,1.0502506891886394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,4,64,128,1,fp8,fp8,0,1.0209973653157551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,4,64,0,1,fp8,fp8,0,1.1242612997690837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,4,64,0,1,float16,fp8,0,1.1962080001831055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,float16,0,1.059168020884196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,float16,0,1.208240032196045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,8,64,128,1,float16,fp8,0,1.0577600002288818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,8,64,128,1,fp8,fp8,0,1.0173973242441814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,8,64,0,1,float16,fp8,0,1.205674648284912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,float16,0,0.5737760066986084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,float16,0,0.6628373463948568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,48,64,128,1,float16,fp8,0,0.5643253326416016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,8,64,0,1,fp8,fp8,0,1.129093329111735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,48,64,128,1,fp8,fp8,0,0.5668533245722452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,48,64,0,1,float16,fp8,0,0.6515733400980631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,float16,0,0.5353920062383016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,48,64,0,1,fp8,fp8,0,0.6390506823857626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,float16,0,0.6064746777216593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,2,64,128,1,fp8,fp8,0,0.5001013278961182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,2,64,128,1,float16,fp8,0,0.5346453189849854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,2,64,0,1,float16,fp8,0,0.6082613468170166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,2,64,0,1,fp8,fp8,0,0.5607039928436279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,float16,0,0.5352319876352946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,float16,0,0.6090773344039917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,4,64,128,1,float16,fp8,0,0.5348693529764811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,4,64,128,1,fp8,fp8,0,0.5009493430455526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,4,64,0,1,float16,fp8,0,0.6107039848963419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,float16,0,0.5404320160547892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,4,64,0,1,fp8,fp8,0,0.5624746481577555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,float16,0,0.6161706844965616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,8,64,128,1,float16,fp8,0,0.5397866566975912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,8,64,128,1,fp8,fp8,0,0.5113653341929117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,8,64,0,1,float16,fp8,0,0.615882674853007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,float16,0,0.300165335337321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,8,64,0,1,fp8,fp8,0,0.572223981221517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,float16,0,0.34559468428293866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,48,64,128,1,float16,fp8,0,0.2950399915377299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,48,64,128,1,fp8,fp8,0,0.2948639988899231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,48,64,0,1,float16,fp8,0,0.33958399295806885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,48,64,0,1,fp8,fp8,0,0.3288533290227254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,float16,0,0.277839998404185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,float16,0,0.3151893417040507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,2,64,128,1,float16,fp8,0,0.27647467454274494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,2,64,128,1,fp8,fp8,0,0.2613760034243266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,2,64,0,1,float16,fp8,0,0.31459200382232666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,2,64,0,1,fp8,fp8,0,0.29208000500996906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,float16,0,0.2791573405265808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,float16,0,0.31643199920654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,4,64,128,1,float16,fp8,0,0.2781706651051839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,4,64,128,1,fp8,fp8,0,0.26185067494710285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,4,64,0,1,float16,fp8,0,0.31642667452494305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,4,64,0,1,fp8,fp8,0,0.2929653326670329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,float16,0,0.2818613251050313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,float16,0,0.32042133808135986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,8,64,128,1,float16,fp8,0,0.2819360097249349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,8,64,128,1,fp8,fp8,0,0.266159991423289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,8,64,0,1,float16,fp8,0,0.31938133637110394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,float16,0,0.15965333580970764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,8,64,0,1,fp8,fp8,0,0.2971466581026713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,float16,0,0.1844693422317505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,48,64,128,1,float16,fp8,0,0.1588106652100881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,48,64,128,1,fp8,fp8,0,0.15804266929626465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,48,64,0,1,float16,fp8,0,0.18164799610773721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,48,64,0,1,fp8,fp8,0,0.1770133376121521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,float16,0,0.16706132888793945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,float16,0,0.14918399850527445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,2,64,128,1,float16,fp8,0,0.1455573340257009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,2,64,128,1,fp8,fp8,0,0.14010133345921835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,2,64,0,1,float16,fp8,0,0.16546133160591125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,2,64,0,1,fp8,fp8,0,0.15495999654134116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,float16,0,0.14633599917093912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,4,64,128,1,float16,fp8,0,0.1462559998035431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,float16,0,0.1673706571261088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,4,64,128,1,fp8,fp8,0,0.14028267065684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,4,64,0,1,float16,fp8,0,0.16566399733225504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,4,64,0,1,fp8,fp8,0,0.15680000185966492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,float16,0,0.14843733112017313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,float16,0,0.169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,8,64,128,1,float16,fp8,0,0.1495519975821177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,8,64,128,1,fp8,fp8,0,0.14269866545995077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,8,64,0,1,float16,fp8,0,0.1697653333346049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,float16,0,0.09041600426038106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,8,64,0,1,fp8,fp8,0,0.16033599774042764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,float16,0,0.10527466734250386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,48,64,128,1,float16,fp8,0,0.08890666564305623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,48,64,128,1,fp8,fp8,0,0.09106133381525676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,48,64,0,1,float16,fp8,0,0.10322133700052898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,48,64,0,1,fp8,fp8,0,0.10194133718808492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,float16,0,0.08169066905975342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,float16,0,0.09284800291061401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,2,64,128,1,float16,fp8,0,0.08256533245245616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,2,64,128,1,fp8,fp8,0,0.07557866473992665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,2,64,0,1,float16,fp8,0,0.09340799848238628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,2,64,0,1,fp8,fp8,0,0.08630933364232381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,float16,0,0.09292266766230266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,float16,0,0.08267733454704285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,4,64,128,1,float16,fp8,0,0.0820853312810262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,4,64,128,1,fp8,fp8,0,0.0765119989713033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,4,64,0,1,float16,fp8,0,0.09437867005666097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,4,64,0,1,fp8,fp8,0,0.08665066957473755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,float16,0,0.08361599842707317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,8,64,128,1,float16,fp8,0,0.08292800188064575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,float16,0,0.09483733773231506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,8,64,128,1,fp8,fp8,0,0.07853333155314128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,8,64,0,1,float16,fp8,0,0.09437333544095357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,8,64,0,1,fp8,fp8,0,0.0870293378829956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,float16,0,0.05217599868774414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,float16,0,0.06048533320426941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,48,64,128,1,float16,fp8,0,0.05242133140563965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,48,64,128,1,fp8,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,48,64,0,1,float16,fp8,0,0.059119999408721924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,48,64,0,1,fp8,fp8,0,0.05710933109124502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,float16,0,0.05009600023428599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,float16,0,0.05619733532269796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,2,64,128,1,fp8,fp8,0,0.04801600178082784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,2,64,0,1,float16,fp8,0,0.05795200169086456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,2,64,128,1,float16,fp8,0,0.05031466484069824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,2,64,0,1,fp8,fp8,0,0.054416000843048096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,float16,0,0.049882665276527405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,float16,0,0.058133333921432495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,4,64,128,1,float16,fp8,0,0.05045333504676819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,4,64,128,1,fp8,fp8,0,0.04855999847253164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,4,64,0,1,fp8,fp8,0,0.052330667773882546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,4,64,0,1,float16,fp8,0,0.0581279993057251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,float16,0,0.05092266698678335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,8,64,128,1,float16,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,8,64,128,1,fp8,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,float16,0,0.05783999959627787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,8,64,0,1,float16,fp8,0,0.056613331039746605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,8,64,0,1,fp8,fp8,0,0.053264002005259194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,float16,0,0.0365280012289683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,float16,0,0.04113066693147024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,48,64,128,1,float16,fp8,0,0.037418665985266365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,48,64,128,1,fp8,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,48,64,0,1,float16,fp8,0,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,48,64,0,1,fp8,fp8,0,0.03982933362325033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,float16,0,0.040847999354203544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,fp8,0,0.035818666219711304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,2,64,128,1,float16,float16,0,0.036229332288106285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,2,64,128,1,fp8,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,2,64,0,1,float16,fp8,0,0.041221333046754204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,2,64,0,1,fp8,fp8,0,0.03845333307981491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,float16,0,0.036544000109036766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,float16,0,0.04027199993530909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,4,64,128,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,4,64,128,1,fp8,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,4,64,0,1,float16,fp8,0,0.04095999896526337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,float16,0,0.03590933233499527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,4,64,0,1,fp8,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,float16,0,0.0414986660083135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,8,64,128,1,float16,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,8,64,128,1,fp8,fp8,0,0.03425599883000056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,8,64,0,1,float16,fp8,0,0.04137066751718521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,8,64,0,1,fp8,fp8,0,0.03788266579310099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,2,64,0,1,float16,float16,0,2.0281599362691245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,2,64,128,1,float16,float16,0,2.020138740539551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,2,64,128,1,float16,fp8,0,2.0191680590311685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,2,64,128,1,fp8,fp8,0,1.9938186009724934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,2,64,0,1,float16,fp8,0,2.017850716908773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,2,64,0,1,fp8,fp8,0,1.9704532623291016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,4,64,128,1,float16,float16,0,2.0269227027893066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,4,64,0,1,float16,float16,0,2.0388107299804688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,4,64,128,1,float16,fp8,0,2.0320000648498535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,4,64,128,1,fp8,fp8,0,2.044874668121338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,4,64,0,1,float16,fp8,0,2.028005282084147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,4,64,0,1,fp8,fp8,0,2.0325172742207847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,8,64,128,1,float16,float16,0,2.0920373598734536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,8,64,128,1,float16,fp8,0,2.0314720471700034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,8,64,128,1,fp8,fp8,0,2.015557289123535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,8,64,0,1,float16,float16,0,2.0908586184183755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,8,64,0,1,float16,fp8,0,2.072330633799235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,48,64,128,1,float16,float16,0,1.0995786984761555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,48,64,0,1,float16,float16,0,1.1148640314737956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,48,64,128,1,float16,fp8,0,1.09225066502889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,8,64,0,1,fp8,fp8,0,2.030325412750244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,48,64,128,1,fp8,fp8,0,1.1022186279296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,48,64,0,1,float16,fp8,0,1.10917329788208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,48,64,0,1,fp8,fp8,0,1.1044533252716064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,2,64,128,1,float16,float16,0,1.020458698272705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,2,64,0,1,float16,float16,0,1.0205600261688232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,2,64,128,1,float16,fp8,0,1.020037333170573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,2,64,128,1,fp8,fp8,0,0.962826649347941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,2,64,0,1,float16,fp8,0,1.0212480227152507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,2,64,0,1,fp8,fp8,0,0.9552426338195801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,4,64,128,1,float16,float16,0,1.023637294769287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,4,64,0,1,float16,float16,0,1.0266773700714111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,4,64,128,1,float16,fp8,0,1.0209919611612956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,4,64,128,1,fp8,fp8,0,1.0082879861195881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,4,64,0,1,float16,fp8,0,1.0241386890411377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,4,64,0,1,fp8,fp8,0,1.0044106642405193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,8,64,128,1,float16,float16,0,1.0311253070831299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,8,64,0,1,float16,float16,0,1.0352319876352947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,8,64,128,1,float16,fp8,0,1.0296533107757568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,8,64,128,1,fp8,fp8,0,0.9925653139750162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,48,64,128,1,float16,float16,0,0.5602666536966959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,8,64,0,1,float16,fp8,0,1.0303146839141846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,48,64,0,1,float16,float16,0,0.5689173142115275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,8,64,0,1,fp8,fp8,0,0.9882293542226156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,48,64,128,1,float16,fp8,0,0.5523360172907511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,48,64,128,1,fp8,fp8,0,0.55458664894104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,48,64,0,1,float16,fp8,0,0.5587146679560343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,48,64,0,1,fp8,fp8,0,0.5547946691513062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,2,64,128,1,float16,float16,0,0.520794669787089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,2,64,0,1,float16,float16,0,0.5202826658884684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,2,64,128,1,float16,fp8,0,0.5191946824391683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,2,64,128,1,fp8,fp8,0,0.48737601439158124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,2,64,0,1,float16,fp8,0,0.5205599864323934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,2,64,0,1,fp8,fp8,0,0.4837813377380371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,4,64,0,1,float16,float16,0,0.5216586589813232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,4,64,128,1,float16,float16,0,0.5221013228098551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,4,64,128,1,fp8,fp8,0,0.48953600724538165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,4,64,128,1,float16,fp8,0,0.5206773281097412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,4,64,0,1,float16,fp8,0,0.5212106704711914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,4,64,0,1,fp8,fp8,0,0.4859679937362671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,8,64,128,1,float16,float16,0,0.525269349416097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,8,64,0,1,float16,float16,0,0.5292373498280843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,8,64,128,1,float16,fp8,0,0.5233333508173624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,8,64,128,1,fp8,fp8,0,0.49718932310740155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,8,64,0,1,fp8,fp8,0,0.49615466594696045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,48,64,128,1,float16,float16,0,0.29260265827178955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,8,64,0,1,float16,fp8,0,0.5259093443552653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,48,64,0,1,float16,float16,0,0.2979360024134318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,48,64,128,1,float16,fp8,0,0.28829866647720337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,48,64,128,1,fp8,fp8,0,0.28755732377370197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,48,64,0,1,float16,fp8,0,0.2933013240496318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,48,64,0,1,fp8,fp8,0,0.2876426577568054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,2,64,128,1,float16,float16,0,0.26852800448735553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,2,64,0,1,float16,float16,0,0.2696106632550557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,2,64,128,1,float16,fp8,0,0.26895467440287274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,2,64,128,1,fp8,fp8,0,0.2532693346341451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,2,64,0,1,fp8,fp8,0,0.2507359981536865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,2,64,0,1,float16,fp8,0,0.26895467440287274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,4,64,128,1,float16,float16,0,0.26943467060724896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,4,64,0,1,float16,float16,0,0.2720586657524109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,4,64,128,1,fp8,fp8,0,0.25456533829371136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,4,64,128,1,float16,fp8,0,0.27078400055567425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,4,64,0,1,float16,fp8,0,0.27111466725667316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,4,64,0,1,fp8,fp8,0,0.25201600790023804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,8,64,128,1,float16,float16,0,0.2733493248621623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,8,64,128,1,float16,fp8,0,0.27339200178782147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,8,64,0,1,float16,float16,0,0.27427732944488525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,8,64,128,1,fp8,fp8,0,0.25813867648442584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,48,64,128,1,float16,float16,0,0.15711999932924905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,8,64,0,1,fp8,fp8,0,0.2570880055427551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,8,64,0,1,float16,fp8,0,0.27503466606140137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,48,64,0,1,float16,float16,0,0.1579253375530243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,48,64,128,1,float16,fp8,0,0.15565866231918335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,48,64,128,1,fp8,fp8,0,0.1551413337389628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,48,64,0,1,float16,fp8,0,0.15667733550071716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,48,64,0,1,fp8,fp8,0,0.1553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,2,64,128,1,float16,float16,0,0.1423893372217814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,2,64,0,1,float16,float16,0,0.1432213286558787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,2,64,128,1,float16,fp8,0,0.14333333571751913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,2,64,128,1,fp8,fp8,0,0.13549333810806274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,2,64,0,1,float16,fp8,0,0.14273066322008768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,4,64,128,1,float16,float16,0,0.14406933387120566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,2,64,0,1,fp8,fp8,0,0.1350986659526825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,4,64,0,1,float16,float16,0,0.14269866545995077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,4,64,128,1,float16,fp8,0,0.1437066694100698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,4,64,128,1,fp8,fp8,0,0.13662399848302206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,4,64,0,1,float16,fp8,0,0.14406399925549826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,4,64,0,1,fp8,fp8,0,0.13499733805656433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,8,64,128,1,float16,float16,0,0.14551466703414917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,8,64,0,1,float16,float16,0,0.14573333660761514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,8,64,128,1,float16,fp8,0,0.14455466469128928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,8,64,0,1,float16,fp8,0,0.1453493336836497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,8,64,128,1,fp8,fp8,0,0.13959999879201254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,8,64,0,1,fp8,fp8,0,0.13792533675829569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,48,64,128,1,float16,float16,0,0.08899733424186707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,48,64,0,1,float16,float16,0,0.08949333429336548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,48,64,128,1,float16,fp8,0,0.08726400136947632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,48,64,128,1,fp8,fp8,0,0.0904853343963623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,48,64,0,1,float16,fp8,0,0.08767466743787129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,48,64,0,1,fp8,fp8,0,0.09038399656613667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,2,64,128,1,float16,float16,0,0.08003733555475871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,2,64,0,1,float16,float16,0,0.0794239987929662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,2,64,128,1,float16,fp8,0,0.08097066481908162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,2,64,128,1,fp8,fp8,0,0.07461866736412048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,2,64,0,1,fp8,fp8,0,0.0747680018345515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,2,64,0,1,float16,fp8,0,0.08092799782752991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,4,64,128,1,float16,float16,0,0.08113066852092743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,4,64,0,1,float16,float16,0,0.08057066798210144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,4,64,128,1,fp8,fp8,0,0.07601066430409749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,4,64,128,1,float16,fp8,0,0.08183999856313069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,4,64,0,1,float16,fp8,0,0.08083733419577281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,4,64,0,1,fp8,fp8,0,0.07514133552710216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,8,64,128,1,float16,float16,0,0.08144000172615051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,8,64,0,1,float16,float16,0,0.08101333181063335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,8,64,128,1,float16,fp8,0,0.08175466458002727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,8,64,128,1,fp8,fp8,0,0.07675200204054515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,8,64,0,1,float16,fp8,0,0.08101866642634074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,8,64,0,1,fp8,fp8,0,0.07470400134722392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,48,64,128,1,float16,float16,0,0.052000001072883606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,48,64,0,1,float16,float16,0,0.05215999980767568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,48,64,128,1,float16,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,48,64,128,1,fp8,fp8,0,0.050517335534095764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,48,64,0,1,float16,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,48,64,0,1,fp8,fp8,0,0.0481333335240682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,2,64,0,1,float16,float16,0,0.04974933465321859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,2,64,128,1,float16,float16,0,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,2,64,128,1,float16,fp8,0,0.048858667413393654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,2,64,128,1,fp8,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,2,64,0,1,fp8,fp8,0,0.04605866471926371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,2,64,0,1,float16,fp8,0,0.05036266644795736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,4,64,128,1,float16,float16,0,0.048800001541773476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,4,64,0,1,float16,float16,0,0.049786667029062905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,4,64,128,1,float16,fp8,0,0.04837866624196371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,4,64,128,1,fp8,fp8,0,0.04683200021584829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,4,64,0,1,float16,fp8,0,0.0497920016447703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,4,64,0,1,fp8,fp8,0,0.046351999044418335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,8,64,128,1,float16,float16,0,0.04990933338801066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,8,64,0,1,float16,float16,0,0.05009600023428599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,8,64,128,1,float16,fp8,0,0.050517335534095764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,8,64,128,1,fp8,fp8,0,0.04775999983151754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,8,64,0,1,float16,fp8,0,0.04945066571235657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,48,64,128,1,float16,float16,0,0.03602666656176249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,8,64,0,1,fp8,fp8,0,0.046394666035970054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,48,64,0,1,float16,float16,0,0.03538133452335993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,48,64,128,1,float16,fp8,0,0.035946667194366455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,48,64,128,1,fp8,fp8,0,0.03573866685231527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,48,64,0,1,float16,fp8,0,0.03611200054486593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,48,64,0,1,fp8,fp8,0,0.03388266762097677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,2,64,128,1,float16,float16,0,0.03549333413441976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,2,64,0,1,float16,float16,0,0.035258665680885315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,2,64,128,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,2,64,128,1,float16,fp8,0,0.03603733330965042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,2,64,0,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,2,64,0,1,fp8,fp8,0,0.03312533348798752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,4,64,128,1,float16,float16,0,0.03551466763019562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,4,64,0,1,float16,float16,0,0.034917332231998444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,4,64,128,1,float16,fp8,0,0.03483733286460241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,4,64,128,1,fp8,fp8,0,0.033173332611719765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,4,64,0,1,float16,fp8,0,0.033887999753157295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,4,64,0,1,fp8,fp8,0,0.03266133368015289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,8,64,128,1,float16,float16,0,0.03482666611671448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,8,64,128,1,float16,fp8,0,0.03497066597143809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,8,64,128,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,8,64,0,1,float16,fp8,0,0.035061334570248924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,8,64,0,1,float16,float16,0,0.034703999757766724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,48,64,128,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,8,64,0,1,fp8,fp8,0,0.03347733368476232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,48,64,0,1,float16,float16,0,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,48,64,128,1,float16,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,48,64,128,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,48,64,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,48,64,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,2,64,128,1,float16,float16,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,2,64,0,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,2,64,128,1,float16,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,2,64,128,1,fp8,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,2,64,0,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,2,64,0,1,fp8,fp8,0,0.024336000283559162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,4,64,0,1,float16,float16,0,0.023728000621000927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,4,64,128,1,float16,float16,0,0.023610666394233704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,4,64,128,1,float16,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,4,64,128,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,4,64,0,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,4,64,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,8,64,128,1,float16,float16,0,0.023728000621000927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,8,64,0,1,float16,float16,0,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,8,64,128,1,float16,fp8,0,0.02496533344189326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,8,64,128,1,fp8,fp8,0,0.02369066576162974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,8,64,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,8,64,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,2,64,128,1,float16,float16,0,0.9477919737497965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,2,64,0,1,float16,float16,0,0.9265440305074056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,2,64,128,1,float16,fp8,0,0.9459359645843506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,2,64,128,1,fp8,fp8,0,0.9014933109283447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,2,64,0,1,fp8,fp8,0,0.8812586466471354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,2,64,0,1,float16,fp8,0,0.9242239793141683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,4,64,128,1,float16,float16,0,0.949669361114502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,4,64,0,1,float16,float16,0,0.9310773213704427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,4,64,128,1,float16,fp8,0,0.9484586715698242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,4,64,128,1,fp8,fp8,0,0.9478987058003744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,4,64,0,1,float16,fp8,0,0.929423967997233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,4,64,0,1,fp8,fp8,0,0.9168000221252441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,8,64,0,1,float16,float16,0,0.9427893161773682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,8,64,128,1,float16,float16,0,0.9677066802978516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,8,64,128,1,float16,fp8,0,0.9619200229644775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,48,64,128,1,float16,float16,0,0.5280479987462362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,8,64,128,1,fp8,fp8,0,0.9360053539276123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,8,64,0,1,fp8,fp8,0,0.8964107036590576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,8,64,0,1,float16,fp8,0,0.9364746411641439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,48,64,0,1,float16,float16,0,0.5170720020929972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,48,64,128,1,float16,fp8,0,0.5178186496098837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,48,64,128,1,fp8,fp8,0,0.5263146559397379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,48,64,0,1,float16,fp8,0,0.5069973468780518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,48,64,0,1,fp8,fp8,0,0.5081386566162109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,2,64,0,1,float16,float16,0,0.4718773365020752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,2,64,128,1,float16,fp8,0,0.48158931732177734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,2,64,128,1,float16,float16,0,0.48286934693654376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,2,64,128,1,fp8,fp8,0,0.45426666736602783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,2,64,0,1,fp8,fp8,0,0.43943464756011963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,2,64,0,1,float16,fp8,0,0.46854400634765625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,4,64,128,1,float16,float16,0,0.48557865619659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,4,64,0,1,float16,float16,0,0.474021315574646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,4,64,128,1,float16,fp8,0,0.4835946559906006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,4,64,128,1,fp8,fp8,0,0.45881064732869464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,4,64,0,1,float16,fp8,0,0.47387198607126874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,4,64,0,1,fp8,fp8,0,0.4434719880421956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,8,64,0,1,float16,float16,0,0.4811306794484456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,8,64,128,1,float16,float16,0,0.4904586474100749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,8,64,128,1,float16,fp8,0,0.4910026788711548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,8,64,128,1,fp8,fp8,0,0.465178648630778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,48,64,128,1,float16,float16,0,0.27562665939331055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,8,64,0,1,fp8,fp8,0,0.44711466630299884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,8,64,0,1,float16,fp8,0,0.4787626663843791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,48,64,0,1,float16,float16,0,0.27134933074315387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,48,64,128,1,float16,fp8,0,0.27145065863927204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,48,64,128,1,fp8,fp8,0,0.27297600110371906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,48,64,0,1,float16,fp8,0,0.26474666595458984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,2,64,128,1,float16,float16,0,0.25218133131663006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,48,64,0,1,fp8,fp8,0,0.26470400889714557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,2,64,0,1,float16,float16,0,0.24637333552042642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,2,64,128,1,float16,fp8,0,0.25013333559036255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,2,64,128,1,fp8,fp8,0,0.2367466688156128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,2,64,0,1,float16,fp8,0,0.24550400177637735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,2,64,0,1,fp8,fp8,0,0.2283786733945211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,4,64,128,1,float16,float16,0,0.25229867299397785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,4,64,0,1,float16,float16,0,0.24821333090464273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,4,64,128,1,fp8,fp8,0,0.23915199438730875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,4,64,0,1,float16,fp8,0,0.24566932519276938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,4,64,128,1,float16,fp8,0,0.25229867299397785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,4,64,0,1,fp8,fp8,0,0.2306613326072693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,8,64,128,1,float16,float16,0,0.25756265719731647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,8,64,0,1,float16,float16,0,0.2518186569213867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,8,64,128,1,float16,fp8,0,0.2548639973004659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,8,64,128,1,fp8,fp8,0,0.24237332741419473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,48,64,128,1,float16,float16,0,0.1474399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,8,64,0,1,float16,fp8,0,0.24924800793329874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,8,64,0,1,fp8,fp8,0,0.23381867011388144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,48,64,0,1,float16,float16,0,0.1444000005722046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,48,64,128,1,fp8,fp8,0,0.14798933267593384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,48,64,128,1,float16,fp8,0,0.14563199877738953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,48,64,0,1,float16,fp8,0,0.14363732933998108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,48,64,0,1,fp8,fp8,0,0.14364266395568848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,2,64,128,1,float16,float16,0,0.13492266337076822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,2,64,0,1,float16,float16,0,0.1321333348751068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,2,64,128,1,float16,fp8,0,0.13434132933616638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,2,64,128,1,fp8,fp8,0,0.12660800417264303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,2,64,0,1,float16,fp8,0,0.13174933195114136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,2,64,0,1,fp8,fp8,0,0.12211733063062032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,4,64,128,1,float16,float16,0,0.1362986663977305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,4,64,0,1,float16,float16,0,0.13196266690889993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,4,64,128,1,float16,fp8,0,0.1344106694062551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,4,64,128,1,fp8,fp8,0,0.12803199887275696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,4,64,0,1,float16,fp8,0,0.1325386663277944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,4,64,0,1,fp8,fp8,0,0.12369599938392639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,8,64,0,1,float16,float16,0,0.13395200173060098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,8,64,128,1,float16,float16,0,0.13702399532000223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,8,64,128,1,float16,fp8,0,0.1385546624660492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,8,64,128,1,fp8,fp8,0,0.13127999504407248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,8,64,0,1,float16,fp8,0,0.1328373352686564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,8,64,0,1,fp8,fp8,0,0.12642666697502136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,48,64,128,1,float16,float16,0,0.08548800150553386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,48,64,0,1,float16,float16,0,0.08290133376916249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,48,64,128,1,float16,fp8,0,0.0827946662902832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,48,64,0,1,float16,fp8,0,0.0827466646830241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,48,64,128,1,fp8,fp8,0,0.08661866188049316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,2,64,128,1,float16,float16,0,0.07721066474914551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,2,64,0,1,float16,float16,0,0.07438399891058604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,48,64,0,1,fp8,fp8,0,0.08316799998283386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,2,64,128,1,float16,fp8,0,0.07814933359622955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,2,64,128,1,fp8,fp8,0,0.0718506673971812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,2,64,0,1,float16,fp8,0,0.07563200096289317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,2,64,0,1,fp8,fp8,0,0.07014933228492737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,4,64,128,1,float16,float16,0,0.07707733412583669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,4,64,0,1,float16,float16,0,0.07496533294518788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,4,64,128,1,float16,fp8,0,0.07700799902280171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,4,64,128,1,fp8,fp8,0,0.07261866827805837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,4,64,0,1,float16,fp8,0,0.07488533357779185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,4,64,0,1,fp8,fp8,0,0.06869866450627644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,8,64,128,1,float16,float16,0,0.07800533374150594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,8,64,0,1,float16,float16,0,0.0757066657145818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,8,64,128,1,float16,fp8,0,0.0773119976123174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,8,64,128,1,fp8,fp8,0,0.07301866511503856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,8,64,0,1,float16,fp8,0,0.07521600027879079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,48,64,128,1,float16,float16,0,0.04797866443792979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,8,64,0,1,fp8,fp8,0,0.07066666583220164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,48,64,128,1,float16,fp8,0,0.0481279989083608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,48,64,0,1,float16,float16,0,0.047295997540156044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,48,64,128,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,48,64,0,1,float16,fp8,0,0.04693333307902018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,2,64,128,1,float16,float16,0,0.04675200084845225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,48,64,0,1,fp8,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,2,64,0,1,float16,float16,0,0.04423466821511587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,2,64,128,1,float16,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,2,64,128,1,fp8,fp8,0,0.04401599864164988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,2,64,0,1,float16,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,2,64,0,1,fp8,fp8,0,0.04194133480389913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,4,64,128,1,float16,float16,0,0.04628799855709076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,4,64,0,1,float16,float16,0,0.04608533283074697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,4,64,128,1,float16,fp8,0,0.046816001335779824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,4,64,128,1,fp8,fp8,0,0.04393066465854645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,4,64,0,1,float16,fp8,0,0.04609066744645437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,4,64,0,1,fp8,fp8,0,0.04334400097529093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,8,64,0,1,float16,float16,0,0.045642669002215065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,8,64,128,1,float16,fp8,0,0.04780266682306925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,8,64,128,1,float16,float16,0,0.045882667104403176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,8,64,128,1,fp8,fp8,0,0.04404266675313314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,8,64,0,1,float16,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,8,64,0,1,fp8,fp8,0,0.04167999823888143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,48,64,128,1,float16,float16,0,0.034373333056767784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,48,64,0,1,float16,float16,0,0.03342399994532267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,48,64,128,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,48,64,128,1,fp8,fp8,0,0.03486400097608566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,48,64,0,1,float16,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,48,64,0,1,fp8,fp8,0,0.03425066669782003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,2,64,128,1,float16,float16,0,0.034143999218940735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,2,64,0,1,float16,float16,0,0.03146133323510488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,2,64,128,1,fp8,fp8,0,0.03179199993610382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,2,64,0,1,float16,fp8,0,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,2,64,128,1,float16,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,2,64,0,1,fp8,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,4,64,128,1,float16,float16,0,0.033733333150545754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,4,64,0,1,float16,float16,0,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,4,64,128,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,4,64,128,1,fp8,fp8,0,0.03258133431275686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,4,64,0,1,float16,fp8,0,0.03206400076548258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,4,64,0,1,fp8,fp8,0,0.031194667021433514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,8,64,128,1,float16,float16,0,0.034373333056767784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,8,64,0,1,float16,float16,0,0.03356799980004629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,8,64,128,1,float16,fp8,0,0.03490666548411051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,8,64,128,1,fp8,fp8,0,0.033215999603271484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,8,64,0,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,8,64,0,1,float16,fp8,0,0.033674667278925575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,48,64,128,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,48,64,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,48,64,128,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,48,64,128,1,float16,fp8,0,0.02369066576162974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,48,64,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,48,64,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,2,64,128,1,float16,float16,0,0.02364266663789749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,2,64,0,1,float16,float16,0,0.022357332209746044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,2,64,128,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,2,64,128,1,fp8,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,2,64,0,1,fp8,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,4,64,128,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,2,64,0,1,float16,fp8,0,0.02367466688156128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,4,64,0,1,float16,float16,0,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,4,64,128,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,4,64,128,1,fp8,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,4,64,0,1,float16,fp8,0,0.023962666591008503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,8,64,128,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,4,64,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,8,64,0,1,float16,float16,0,0.023002666731675465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,8,64,128,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,8,64,128,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,8,64,0,1,fp8,fp8,0,0.021509334444999695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,8,64,0,1,float16,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,48,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,48,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,48,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,48,64,128,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,48,64,0,1,float16,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,48,64,0,1,fp8,fp8,0,0.019802667200565338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,2,64,128,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,2,64,0,1,float16,float16,0,0.019866666446129482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,2,64,128,1,fp8,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,2,64,128,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,2,64,0,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,2,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,4,64,128,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,4,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,4,64,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,4,64,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,4,64,128,1,fp8,fp8,0,0.020117333779732387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,4,64,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,8,64,128,1,float16,float16,0,0.01979200045267741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,8,64,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,8,64,128,1,float16,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,8,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,8,64,0,1,float16,fp8,0,0.02027733375628789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,8,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,2,64,128,1,float16,float16,0,0.5008746782938639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,2,64,128,1,float16,fp8,0,0.498032013575236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,2,64,0,1,float16,float16,0,0.5004053513209025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,2,64,128,1,fp8,fp8,0,0.47809068361918133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,2,64,0,1,float16,fp8,0,0.49798933664957684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,2,64,0,1,fp8,fp8,0,0.4772160053253174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,4,64,128,1,float16,float16,0,0.5022133191426595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,4,64,0,1,float16,float16,0,0.5026293198267618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,4,64,128,1,float16,fp8,0,0.5023200114568075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,4,64,128,1,fp8,fp8,0,0.48840534687042236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,4,64,0,1,float16,fp8,0,0.5001013278961182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,4,64,0,1,fp8,fp8,0,0.48770666122436523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,8,64,128,1,float16,float16,0,0.5089600086212158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,8,64,0,1,float16,float16,0,0.5090239842732748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,8,64,128,1,float16,fp8,0,0.5056106646855673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,8,64,128,1,fp8,fp8,0,0.4919946591059367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,8,64,0,1,float16,fp8,0,0.505791982014974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,8,64,0,1,fp8,fp8,0,0.4914506673812866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,48,64,128,1,float16,float16,0,0.28199466069539386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,48,64,0,1,float16,float16,0,0.2826026678085327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,48,64,128,1,float16,fp8,0,0.27667733033498126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,48,64,0,1,float16,fp8,0,0.2774239977200826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,48,64,128,1,fp8,fp8,0,0.28217599789301556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,48,64,0,1,fp8,fp8,0,0.28180267413457233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,2,64,128,1,float16,float16,0,0.25806933641433716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,2,64,0,1,float16,float16,0,0.26078933477401733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,2,64,128,1,float16,fp8,0,0.2585973342259725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,2,64,128,1,fp8,fp8,0,0.24759467442830405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,2,64,0,1,float16,fp8,0,0.2585653265317281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,2,64,0,1,fp8,fp8,0,0.24529600143432617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,4,64,128,1,float16,float16,0,0.25945067405700684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,4,64,0,1,float16,float16,0,0.259552001953125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,4,64,128,1,float16,fp8,0,0.25918400287628174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,4,64,128,1,fp8,fp8,0,0.2502826650937398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,4,64,0,1,fp8,fp8,0,0.25012799104054767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,4,64,0,1,float16,fp8,0,0.25890133778254193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,8,64,128,1,float16,float16,0,0.2632586757342021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,8,64,0,1,float16,float16,0,0.2637386719385783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,8,64,128,1,float16,fp8,0,0.2614933252334595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,8,64,128,1,fp8,fp8,0,0.25387199719746906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,8,64,0,1,float16,fp8,0,0.2605546712875366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,48,64,128,1,float16,float16,0,0.15029866496721903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,8,64,0,1,fp8,fp8,0,0.252949337164561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,48,64,0,1,float16,float16,0,0.15001599987347922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,48,64,128,1,float16,fp8,0,0.1465013325214386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,48,64,128,1,fp8,fp8,0,0.150629331668218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,48,64,0,1,float16,fp8,0,0.14848533272743225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,2,64,128,1,float16,float16,0,0.13690132896105447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,48,64,0,1,fp8,fp8,0,0.15109333395957947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,2,64,0,1,float16,float16,0,0.13834666212399802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,2,64,128,1,float16,fp8,0,0.1389226714769999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,2,64,128,1,fp8,fp8,0,0.1311360001564026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,2,64,0,1,float16,fp8,0,0.13822399576505026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,2,64,0,1,fp8,fp8,0,0.1312266687552134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,4,64,128,1,float16,float16,0,0.13886400063832602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,4,64,0,1,float16,float16,0,0.13838932911554971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,4,64,128,1,float16,fp8,0,0.13738133509953818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,4,64,128,1,fp8,fp8,0,0.13242666920026144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,4,64,0,1,float16,fp8,0,0.1381600002447764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,4,64,0,1,fp8,fp8,0,0.13319466511408487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,8,64,128,1,float16,float16,0,0.14231466253598532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,8,64,0,1,float16,float16,0,0.14075733224550882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,8,64,128,1,float16,fp8,0,0.1400053302447001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,8,64,128,1,fp8,fp8,0,0.1356160044670105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,8,64,0,1,float16,fp8,0,0.1411786675453186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,48,64,128,1,float16,float16,0,0.0845973292986552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,48,64,0,1,float16,float16,0,0.0844640036424001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,8,64,0,1,fp8,fp8,0,0.1358453333377838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,48,64,128,1,float16,fp8,0,0.08390933275222778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,48,64,0,1,float16,fp8,0,0.08340266346931458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,48,64,0,1,fp8,fp8,0,0.08742400010426839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,48,64,128,1,fp8,fp8,0,0.08719467123349507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,2,64,128,1,float16,float16,0,0.0766186664501826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,2,64,0,1,float16,float16,0,0.07693333427111308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,2,64,128,1,float16,fp8,0,0.07685866455237071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,2,64,128,1,fp8,fp8,0,0.07293866574764252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,2,64,0,1,float16,fp8,0,0.07845866680145264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,2,64,0,1,fp8,fp8,0,0.07295999924341838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,4,64,128,1,float16,float16,0,0.07739733159542084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,4,64,0,1,float16,float16,0,0.07861333092053731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,4,64,128,1,float16,fp8,0,0.07690666615962982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,4,64,128,1,fp8,fp8,0,0.07295999924341838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,4,64,0,1,float16,fp8,0,0.0767626663049062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,4,64,0,1,fp8,fp8,0,0.07261333366235097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,8,64,128,1,float16,float16,0,0.07895466685295105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,8,64,0,1,float16,float16,0,0.07772266864776611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,8,64,128,1,float16,fp8,0,0.07854400078455608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,8,64,0,1,float16,fp8,0,0.07871466875076294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,8,64,128,1,fp8,fp8,0,0.07449066638946533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,8,64,0,1,fp8,fp8,0,0.07438399891058604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,48,64,128,1,float16,float16,0,0.04967466493447622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,48,64,0,1,float16,float16,0,0.04997866849104563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,48,64,128,1,float16,fp8,0,0.04808533191680908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,48,64,128,1,fp8,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,48,64,0,1,float16,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,48,64,0,1,fp8,fp8,0,0.04948799808820089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,2,64,128,1,float16,float16,0,0.0476693312327067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,2,64,0,1,float16,float16,0,0.0476800004641215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,2,64,128,1,float16,fp8,0,0.04645866652329763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,2,64,128,1,fp8,fp8,0,0.04359999795754751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,2,64,0,1,float16,fp8,0,0.047637333472569786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,2,64,0,1,fp8,fp8,0,0.044213334719340004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,4,64,128,1,float16,float16,0,0.04587199787298838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,4,64,0,1,float16,float16,0,0.047637333472569786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,4,64,128,1,float16,fp8,0,0.045994664231936135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,4,64,128,1,fp8,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,4,64,0,1,float16,fp8,0,0.04797333478927612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,8,64,128,1,float16,float16,0,0.04574933151404063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,4,64,0,1,fp8,fp8,0,0.044351999958356224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,8,64,0,1,float16,float16,0,0.047466665506362915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,8,64,128,1,float16,fp8,0,0.047695999344189964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,8,64,0,1,float16,fp8,0,0.0468800018231074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,8,64,128,1,fp8,fp8,0,0.04598399996757507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,8,64,0,1,fp8,fp8,0,0.045647998650868736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,48,64,128,1,float16,float16,0,0.03298133363326391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,48,64,0,1,float16,float16,0,0.033157333731651306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,48,64,128,1,float16,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,48,64,128,1,fp8,fp8,0,0.033029332756996155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,48,64,0,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,48,64,0,1,fp8,fp8,0,0.03251733382542928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,2,64,128,1,float16,float16,0,0.031199999153614044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,2,64,0,1,float16,float16,0,0.031504000226656594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,2,64,128,1,float16,fp8,0,0.03133866687615713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,2,64,128,1,fp8,fp8,0,0.030581332743167877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,2,64,0,1,float16,fp8,0,0.03183999905983607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,2,64,0,1,fp8,fp8,0,0.029653333127498627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,4,64,128,1,float16,float16,0,0.03139200061559677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,4,64,0,1,float16,float16,0,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,4,64,128,1,float16,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,4,64,128,1,fp8,fp8,0,0.029818666477998097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,4,64,0,1,fp8,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,4,64,0,1,float16,fp8,0,0.032074667513370514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,8,64,128,1,float16,float16,0,0.03254933406909307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,8,64,128,1,fp8,fp8,0,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,8,64,128,1,float16,fp8,0,0.031717332700888314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,8,64,0,1,float16,float16,0,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,8,64,0,1,float16,fp8,0,0.031888000667095184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,8,64,0,1,fp8,fp8,0,0.030394665896892548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,48,64,128,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,48,64,0,1,float16,float16,0,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,48,64,128,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,48,64,128,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,48,64,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,48,64,0,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,2,64,128,1,float16,float16,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,2,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,2,64,128,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,2,64,128,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,2,64,0,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,2,64,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,4,64,128,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,4,64,0,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,4,64,128,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,4,64,0,1,float16,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,4,64,0,1,fp8,fp8,0,0.02170666555563609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,4,64,128,1,fp8,fp8,0,0.023546665906906128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,8,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,8,64,0,1,float16,float16,0,0.025066666305065155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,8,64,128,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,8,64,128,1,fp8,fp8,0,0.023007998863856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,8,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,8,64,0,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,48,64,128,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,48,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,48,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,48,64,128,1,fp8,fp8,0,0.01806933308641116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,48,64,0,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,48,64,0,1,float16,fp8,0,0.01874133323629697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,2,64,128,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,2,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,2,64,0,1,float16,float16,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,2,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,2,64,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,2,64,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,4,64,128,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,4,64,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,4,64,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,4,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,4,64,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,8,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,8,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,4,64,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,8,64,128,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,8,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,8,64,0,1,float16,fp8,0,0.01762666677435239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,8,64,0,1,fp8,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,48,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,48,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,48,64,128,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,48,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,48,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,48,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,2,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,2,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,2,64,128,1,float16,fp8,0,0.01855466639002164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,2,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,2,64,0,1,fp8,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,2,64,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,4,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,4,64,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,4,64,128,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,4,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,4,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,4,64,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,8,64,128,1,float16,float16,0,0.016309333344300587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,8,64,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,8,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,8,64,128,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,8,64,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,8,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,2,64,128,1,float16,float16,0,0.3426933288574219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,2,64,0,1,float16,float16,0,0.3415679931640625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,2,64,128,1,float16,fp8,0,0.34012798468271893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,2,64,128,1,fp8,fp8,0,0.320853332678477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,2,64,0,1,float16,fp8,0,0.34001068274180096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,2,64,0,1,fp8,fp8,0,0.3205173412958781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,4,64,128,1,float16,float16,0,0.341264009475708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,4,64,0,1,float16,float16,0,0.3407520055770874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,4,64,128,1,float16,fp8,0,0.3413439989089966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,4,64,128,1,fp8,fp8,0,0.32361066341400146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,4,64,0,1,float16,fp8,0,0.33965333302815753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,4,64,0,1,fp8,fp8,0,0.3251840074857076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,8,64,128,1,float16,float16,0,0.34565865993499756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,8,64,0,1,float16,float16,0,0.345797340075175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,8,64,128,1,float16,fp8,0,0.3433599869410197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,8,64,128,1,fp8,fp8,0,0.325658659140269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,8,64,0,1,float16,fp8,0,0.3436906735102336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,8,64,0,1,fp8,fp8,0,0.32607465982437134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,48,64,128,1,float16,float16,0,0.1895893414815267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,48,64,128,1,float16,fp8,0,0.187226672967275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,48,64,0,1,float16,float16,0,0.18980799118677774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,48,64,128,1,fp8,fp8,0,0.18773333231608072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,48,64,0,1,float16,fp8,0,0.1885706583658854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,48,64,0,1,fp8,fp8,0,0.1860640048980713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,2,64,128,1,float16,float16,0,0.17947200934092203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,2,64,0,1,float16,float16,0,0.17881067593892416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,2,64,128,1,float16,fp8,0,0.17890665928522745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,2,64,128,1,fp8,fp8,0,0.1665546695391337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,2,64,0,1,float16,fp8,0,0.17803732554117838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,2,64,0,1,fp8,fp8,0,0.16747732957204184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,4,64,128,1,float16,float16,0,0.17918399969736734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,4,64,0,1,float16,float16,0,0.17734932899475098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,4,64,128,1,float16,fp8,0,0.1773866613705953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,4,64,128,1,fp8,fp8,0,0.1693333387374878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,4,64,0,1,float16,fp8,0,0.17847466468811035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,4,64,0,1,fp8,fp8,0,0.16970133781433105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,8,64,128,1,float16,float16,0,0.18068800369898477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,8,64,0,1,float16,float16,0,0.18066134055455527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,8,64,128,1,float16,fp8,0,0.17966399590174356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,8,64,128,1,fp8,fp8,0,0.17100266615549722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,8,64,0,1,float16,fp8,0,0.17982399463653564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,48,64,128,1,float16,float16,0,0.1042080024878184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,48,64,0,1,float16,float16,0,0.10520533720652263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,8,64,0,1,fp8,fp8,0,0.17132800817489624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,48,64,128,1,float16,fp8,0,0.10523200035095215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,48,64,128,1,fp8,fp8,0,0.1049013336499532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,48,64,0,1,float16,fp8,0,0.10410666465759277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,2,64,128,1,float16,float16,0,0.09851200381914775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,48,64,0,1,fp8,fp8,0,0.10403733452161153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,2,64,0,1,float16,float16,0,0.09805867075920105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,2,64,128,1,float16,fp8,0,0.09685333569844563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,2,64,128,1,fp8,fp8,0,0.09092799822489421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,2,64,0,1,float16,fp8,0,0.0976693332195282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,2,64,0,1,fp8,fp8,0,0.09178133805592854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,4,64,128,1,float16,float16,0,0.09731733798980713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,4,64,0,1,float16,float16,0,0.0979200005531311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,4,64,128,1,float16,fp8,0,0.09715200463930766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,4,64,128,1,fp8,fp8,0,0.09097599983215332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,4,64,0,1,float16,fp8,0,0.09713600079218547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,4,64,0,1,fp8,fp8,0,0.09070400396982829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,8,64,128,1,float16,float16,0,0.09917333722114563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,8,64,128,1,fp8,fp8,0,0.09114666779836018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,8,64,0,1,float16,float16,0,0.09801066915194194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,8,64,128,1,float16,fp8,0,0.09734400113423665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,8,64,0,1,float16,fp8,0,0.09936533371607463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,48,64,128,1,float16,float16,0,0.05947199960549673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,8,64,0,1,fp8,fp8,0,0.09087466200192769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,48,64,0,1,float16,float16,0,0.058277333776156105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,48,64,128,1,float16,fp8,0,0.058378666639328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,48,64,128,1,fp8,fp8,0,0.05620799958705902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,48,64,0,1,fp8,fp8,0,0.056517332792282104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,48,64,0,1,float16,fp8,0,0.05819199979305267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,2,64,128,1,float16,float16,0,0.056602666775385536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,2,64,0,1,float16,float16,0,0.056277334690093994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,2,64,128,1,fp8,fp8,0,0.05384000142415365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,2,64,128,1,float16,fp8,0,0.056736002365748085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,2,64,0,1,float16,fp8,0,0.05691733459631602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,2,64,0,1,fp8,fp8,0,0.05300266544024149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,4,64,128,1,float16,float16,0,0.056143999099731445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,4,64,0,1,float16,float16,0,0.05663999915122986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,4,64,128,1,float16,fp8,0,0.055904000997543335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,4,64,128,1,fp8,fp8,0,0.054197331269582115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,4,64,0,1,float16,fp8,0,0.05635733405749003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,4,64,0,1,fp8,fp8,0,0.053770666321118675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,8,64,128,1,float16,float16,0,0.05611733098824819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,8,64,0,1,float16,float16,0,0.05769066512584686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,8,64,128,1,float16,fp8,0,0.05761066575845083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,8,64,128,1,fp8,fp8,0,0.05372266471385956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,8,64,0,1,float16,fp8,0,0.05671466886997223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,8,64,0,1,fp8,fp8,0,0.054101333022117615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,48,64,0,1,float16,float16,0,0.03764266769091288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,48,64,128,1,float16,float16,0,0.037605332831541695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,48,64,128,1,float16,fp8,0,0.039450667798519135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,48,64,128,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,48,64,0,1,float16,fp8,0,0.038218667109807335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,48,64,0,1,fp8,fp8,0,0.037418665985266365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,2,64,128,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,2,64,128,1,float16,fp8,0,0.03645866612593333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,2,64,0,1,float16,float16,0,0.03821333249409994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,2,64,128,1,fp8,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,2,64,0,1,float16,fp8,0,0.037087999284267426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,2,64,0,1,fp8,fp8,0,0.03626133253177007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,4,64,128,1,float16,float16,0,0.03633599976698557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,4,64,0,1,float16,float16,0,0.03687999894221624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,4,64,128,1,float16,fp8,0,0.03812266637881597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,4,64,128,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,4,64,0,1,float16,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,4,64,0,1,fp8,fp8,0,0.036271999279658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,8,64,128,1,float16,float16,0,0.0372533326347669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,8,64,0,1,float16,float16,0,0.037605332831541695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,8,64,128,1,float16,fp8,0,0.03761066744724909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,8,64,128,1,fp8,fp8,0,0.03640533238649368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,8,64,0,1,float16,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,48,64,128,1,float16,float16,0,0.025605333348115284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,8,64,0,1,fp8,fp8,0,0.03591466695070267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,48,64,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,48,64,128,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,48,64,0,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,48,64,128,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,48,64,0,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,2,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,2,64,0,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,2,64,128,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,2,64,128,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,2,64,0,1,float16,fp8,0,0.025744001070658367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,2,64,0,1,fp8,fp8,0,0.02497066557407379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,4,64,128,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,4,64,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,4,64,128,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,4,64,128,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,4,64,0,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,4,64,0,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,8,64,128,1,float16,float16,0,0.025018667181332905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,8,64,128,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,8,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,8,64,0,1,float16,float16,0,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,8,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,8,64,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,48,64,128,1,float16,float16,0,0.020282667130231857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,48,64,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,48,64,128,1,float16,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,48,64,128,1,fp8,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,48,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,48,64,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,2,64,128,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,2,64,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,2,64,128,1,float16,fp8,0,0.019754666835069656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,2,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,2,64,0,1,float16,fp8,0,0.020645332833131153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,2,64,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,4,64,0,1,float16,float16,0,0.01970133309563001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,4,64,128,1,float16,float16,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,4,64,128,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,4,64,128,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,4,64,0,1,fp8,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,4,64,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,8,64,128,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,8,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,8,64,128,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,8,64,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,8,64,0,1,float16,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,8,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,48,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,48,64,128,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,48,64,128,1,float16,float16,0,0.017717332889636356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,48,64,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,48,64,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,48,64,0,1,fp8,fp8,0,0.017903999735911686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,2,64,0,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,2,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,2,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,2,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,2,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,4,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,2,64,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,4,64,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,4,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,4,64,128,1,fp8,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,4,64,0,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,4,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,8,64,128,1,float16,float16,0,0.015781333049138386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,8,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,8,64,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,8,64,128,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,8,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,8,64,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,48,64,128,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,48,64,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,48,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,48,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,48,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,48,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,2,64,128,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,2,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,2,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,2,64,128,1,fp8,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,2,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,4,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,2,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,4,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,4,64,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,4,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,4,64,128,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,8,64,128,1,float16,float16,0,0.01643199970324834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,4,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,8,64,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,8,64,128,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,8,64,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,8,64,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,8,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,2,64,128,1,float16,float16,0,0.26362133026123047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,2,64,0,1,float16,float16,0,0.263589342435201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,2,64,128,1,float16,fp8,0,0.2622613310813904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,2,64,128,1,fp8,fp8,0,0.24622933069864908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,2,64,0,1,fp8,fp8,0,0.24496533473332724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,2,64,0,1,float16,fp8,0,0.2630773385365804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,4,64,128,1,float16,float16,0,0.2630666693051656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,4,64,0,1,float16,float16,0,0.2632906635602315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,4,64,128,1,float16,fp8,0,0.26126400629679364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,4,64,128,1,fp8,fp8,0,0.24766933917999268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,4,64,0,1,float16,fp8,0,0.26288533210754395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,4,64,0,1,fp8,fp8,0,0.24783466259638467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,8,64,128,1,float16,float16,0,0.26522666215896606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,8,64,0,1,float16,float16,0,0.264575997988383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,8,64,128,1,float16,fp8,0,0.2638933261235555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,8,64,128,1,fp8,fp8,0,0.2518133322397868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,8,64,0,1,float16,fp8,0,0.2630079984664917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,8,64,0,1,fp8,fp8,0,0.249071995417277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,48,64,0,1,float16,float16,0,0.14681599537531534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,48,64,128,1,float16,float16,0,0.14692800243695578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,48,64,128,1,float16,fp8,0,0.1442506710688273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,48,64,128,1,fp8,fp8,0,0.1444960037867228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,48,64,0,1,float16,fp8,0,0.14684266845385233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,2,64,128,1,float16,float16,0,0.13875733812650046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,48,64,0,1,fp8,fp8,0,0.14427733421325684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,2,64,0,1,float16,float16,0,0.1399999956289927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,2,64,128,1,float16,fp8,0,0.14046399792035422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,2,64,128,1,fp8,fp8,0,0.13013333082199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,2,64,0,1,float16,fp8,0,0.13878933588663736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,2,64,0,1,fp8,fp8,0,0.13012267152468363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,4,64,128,1,float16,float16,0,0.13974400361378989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,4,64,0,1,float16,float16,0,0.13970667123794556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,4,64,128,1,fp8,fp8,0,0.13005333145459494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,4,64,128,1,float16,fp8,0,0.14037866393725076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,4,64,0,1,float16,fp8,0,0.1384160021940867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,4,64,0,1,fp8,fp8,0,0.13025066256523132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,8,64,128,1,float16,float16,0,0.14096533258756003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,8,64,0,1,float16,float16,0,0.14086932937304178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,8,64,128,1,float16,fp8,0,0.13919466733932495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,8,64,128,1,fp8,fp8,0,0.13026666641235352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,48,64,128,1,float16,float16,0,0.07859733204046886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,48,64,0,1,float16,float16,0,0.07858133316040039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,8,64,0,1,fp8,fp8,0,0.13102933764457703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,8,64,0,1,float16,fp8,0,0.1405280033747355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,48,64,128,1,float16,fp8,0,0.07909866670767467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,48,64,128,1,fp8,fp8,0,0.07580799857775371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,48,64,0,1,float16,fp8,0,0.07868266602357228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,48,64,0,1,fp8,fp8,0,0.07689600189526875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,2,64,128,1,float16,float16,0,0.07814399898052216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,2,64,0,1,float16,float16,0,0.07682666679223378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,2,64,128,1,float16,fp8,0,0.07669333120187123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,2,64,128,1,fp8,fp8,0,0.073594664533933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,2,64,0,1,float16,fp8,0,0.07660266757011414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,2,64,0,1,fp8,fp8,0,0.07387199997901917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,4,64,128,1,float16,float16,0,0.0767680009206136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,4,64,0,1,float16,float16,0,0.07701333363850911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,4,64,128,1,float16,fp8,0,0.07654933134714763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,4,64,128,1,fp8,fp8,0,0.07255466779073079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,4,64,0,1,float16,fp8,0,0.07709333300590515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,4,64,0,1,fp8,fp8,0,0.07417599856853485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,8,64,128,1,float16,float16,0,0.07642666498819987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,8,64,128,1,float16,fp8,0,0.07799466451009114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,8,64,0,1,float16,float16,0,0.07694399853547414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,8,64,128,1,fp8,fp8,0,0.07388266424338023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,8,64,0,1,float16,fp8,0,0.07853333155314128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,8,64,0,1,fp8,fp8,0,0.07356266677379608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,48,64,128,1,float16,float16,0,0.048112000028292336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,48,64,0,1,float16,float16,0,0.04951466619968414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,48,64,128,1,float16,fp8,0,0.04865066707134247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,48,64,128,1,fp8,fp8,0,0.04770666857560476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,48,64,0,1,float16,fp8,0,0.048432002464930214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,48,64,0,1,fp8,fp8,0,0.046037331223487854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,2,64,128,1,float16,float16,0,0.048010667165120445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,2,64,0,1,float16,float16,0,0.04743466774622599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,2,64,128,1,float16,fp8,0,0.04702933132648468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,2,64,128,1,fp8,fp8,0,0.04549333453178406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,2,64,0,1,float16,fp8,0,0.0470719983180364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,4,64,128,1,float16,float16,0,0.0476693312327067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,2,64,0,1,fp8,fp8,0,0.04550399879614512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,4,64,0,1,float16,float16,0,0.047456001242001854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,4,64,128,1,float16,fp8,0,0.04785066843032837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,4,64,0,1,float16,fp8,0,0.04756799836953481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,4,64,128,1,fp8,fp8,0,0.046154667933781944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,4,64,0,1,fp8,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,8,64,128,1,float16,float16,0,0.04790399968624115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,8,64,0,1,float16,float16,0,0.04827199876308441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,8,64,128,1,float16,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,8,64,128,1,fp8,fp8,0,0.04660800099372864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,8,64,0,1,float16,fp8,0,0.047322665651639305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,48,64,128,1,float16,float16,0,0.03164800008138021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,8,64,0,1,fp8,fp8,0,0.04529066880544027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,48,64,0,1,float16,float16,0,0.03186133255561193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,48,64,128,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,48,64,128,1,fp8,fp8,0,0.031167998909950256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,48,64,0,1,float16,fp8,0,0.03197866678237915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,48,64,0,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,2,64,128,1,float16,float16,0,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,2,64,0,1,float16,float16,0,0.03200533241033554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,2,64,128,1,float16,fp8,0,0.03182933231194814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,2,64,128,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,2,64,0,1,fp8,fp8,0,0.03124266614516576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,2,64,0,1,float16,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,4,64,128,1,float16,float16,0,0.03134933362404505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,4,64,0,1,float16,float16,0,0.031199999153614044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,4,64,128,1,fp8,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,4,64,128,1,float16,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,4,64,0,1,float16,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,4,64,0,1,fp8,fp8,0,0.02974933385848999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,8,64,128,1,float16,float16,0,0.030282666285832722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,8,64,0,1,float16,float16,0,0.031290667752424874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,8,64,128,1,float16,fp8,0,0.030623999734719593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,8,64,128,1,fp8,fp8,0,0.02978666623433431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,8,64,0,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,8,64,0,1,fp8,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,48,64,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,48,64,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,48,64,128,1,float16,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,48,64,128,1,fp8,fp8,0,0.021615999440352123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,48,64,0,1,float16,fp8,0,0.021615999440352123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,48,64,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,2,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,2,64,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,2,64,128,1,float16,fp8,0,0.022389332453409832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,2,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,2,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,2,64,0,1,float16,fp8,0,0.021920000513394673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,4,64,128,1,float16,float16,0,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,4,64,0,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,4,64,128,1,float16,fp8,0,0.02237333357334137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,4,64,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,4,64,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,4,64,0,1,fp8,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,8,64,128,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,8,64,0,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,8,64,128,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,8,64,128,1,float16,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,8,64,0,1,float16,fp8,0,0.022821334501107533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,8,64,0,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,48,64,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,48,64,128,1,float16,float16,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,48,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,48,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,48,64,0,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,48,64,0,1,fp8,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,2,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,2,64,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,2,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,2,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,2,64,128,1,fp8,fp8,0,0.01844800015290578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,2,64,0,1,fp8,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,4,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,4,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,4,64,128,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,4,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,4,64,0,1,float16,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,4,64,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,8,64,128,1,float16,float16,0,0.017722666263580322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,8,64,0,1,float16,float16,0,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,8,64,128,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,8,64,128,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,8,64,0,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,8,64,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,48,64,128,1,float16,float16,0,0.015568000574906668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,48,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,48,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,48,64,128,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,48,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,48,64,0,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,2,64,128,1,float16,float16,0,0.015781333049138386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,2,64,128,1,float16,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,2,64,0,1,float16,float16,0,0.01613333324591319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,2,64,0,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,2,64,128,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,2,64,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,4,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,4,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,4,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,4,64,128,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,4,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,4,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,8,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,8,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,8,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,8,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,8,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,8,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,48,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,48,64,128,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,48,64,0,1,float16,float16,0,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,48,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,48,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,48,64,0,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,2,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,2,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,2,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,2,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,2,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,2,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,4,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,4,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,4,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,4,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,4,64,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,4,64,0,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,8,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,8,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,8,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,8,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,8,64,0,1,float16,fp8,0,0.016704000532627106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,8,64,0,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,2,64,128,1,float16,float16,0,0.22784000635147095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,2,64,0,1,float16,float16,0,0.22850666443506876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,2,64,128,1,float16,fp8,0,0.22717867294947305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,2,64,128,1,fp8,fp8,0,0.21025600035985312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,2,64,0,1,fp8,fp8,0,0.21010132630666098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,2,64,0,1,float16,fp8,0,0.22829333941141763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,4,64,128,1,float16,float16,0,0.2283466657002767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,4,64,128,1,float16,fp8,0,0.22805333137512207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,4,64,0,1,float16,float16,0,0.22718934218088785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,4,64,128,1,fp8,fp8,0,0.2120479941368103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,4,64,0,1,fp8,fp8,0,0.2100693384806315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,4,64,0,1,float16,fp8,0,0.22791999578475952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,8,64,128,1,float16,float16,0,0.2288960019747416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,8,64,0,1,float16,float16,0,0.22919466098149618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,8,64,128,1,float16,fp8,0,0.23035200436909994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,8,64,128,1,fp8,fp8,0,0.21152534087498984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,8,64,0,1,fp8,fp8,0,0.2117919921875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,8,64,0,1,float16,fp8,0,0.22914133469263712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,48,64,128,1,float16,float16,0,0.1237333317597707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,48,64,0,1,float16,float16,0,0.1237440009911855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,48,64,128,1,float16,fp8,0,0.12362666924794515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,48,64,128,1,fp8,fp8,0,0.11758933464686076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,48,64,0,1,float16,fp8,0,0.12381333112716675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,48,64,0,1,fp8,fp8,0,0.11781866351763408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,2,64,128,1,float16,float16,0,0.122079998254776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,2,64,128,1,float16,fp8,0,0.12051733334859212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,2,64,128,1,fp8,fp8,0,0.1136853297551473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,2,64,0,1,float16,float16,0,0.12125866611798604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,2,64,0,1,float16,fp8,0,0.1216159959634145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,2,64,0,1,fp8,fp8,0,0.11294399698575337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,4,64,128,1,float16,float16,0,0.12063466509183247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,4,64,128,1,float16,fp8,0,0.11993066469828288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,4,64,0,1,float16,float16,0,0.1202239990234375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,4,64,0,1,float16,fp8,0,0.12198399504025777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,4,64,128,1,fp8,fp8,0,0.11356266339619954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,4,64,0,1,fp8,fp8,0,0.11327999830245972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,8,64,128,1,float16,float16,0,0.12201066811879475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,8,64,0,1,float16,float16,0,0.12171199917793274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,8,64,128,1,float16,fp8,0,0.1218773325284322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,8,64,128,1,fp8,fp8,0,0.11358933647473653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,8,64,0,1,float16,fp8,0,0.12221866846084595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,48,64,128,1,float16,float16,0,0.06923733154932658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,8,64,0,1,fp8,fp8,0,0.11329600214958191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,48,64,0,1,float16,float16,0,0.0702453354994456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,48,64,128,1,fp8,fp8,0,0.06660800178845723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,48,64,128,1,float16,fp8,0,0.07082133491834004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,48,64,0,1,float16,fp8,0,0.07043733199437459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,48,64,0,1,fp8,fp8,0,0.0662773350874583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,2,64,128,1,float16,float16,0,0.06966933111349742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,2,64,0,1,float16,float16,0,0.06870399912198384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,2,64,128,1,float16,fp8,0,0.06881066660086314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,2,64,0,1,float16,fp8,0,0.06835199892520905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,2,64,128,1,fp8,fp8,0,0.06498666604359944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,2,64,0,1,fp8,fp8,0,0.0664106657107671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,4,64,128,1,float16,float16,0,0.07067200044790904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,4,64,0,1,float16,float16,0,0.06866133213043213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,4,64,128,1,float16,fp8,0,0.07021866738796234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,4,64,128,1,fp8,fp8,0,0.06634666522343953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,4,64,0,1,float16,fp8,0,0.06868266562620799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,4,64,0,1,fp8,fp8,0,0.06595733265082042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,8,64,128,1,float16,float16,0,0.07071466743946075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,8,64,0,1,float16,float16,0,0.06909866631031036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,8,64,128,1,float16,fp8,0,0.07004799942175548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,8,64,128,1,fp8,fp8,0,0.06629866858323415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,8,64,0,1,float16,fp8,0,0.06989333530267079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,8,64,0,1,fp8,fp8,0,0.0654720018307368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,48,64,128,1,float16,float16,0,0.04191466669241587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,48,64,0,1,float16,float16,0,0.04294399917125702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,48,64,128,1,float16,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,48,64,128,1,fp8,fp8,0,0.04155199974775314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,48,64,0,1,fp8,fp8,0,0.041493333876132965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,2,64,128,1,float16,float16,0,0.04171733558177948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,48,64,0,1,float16,fp8,0,0.04192000130812327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,2,64,0,1,float16,float16,0,0.0421013335386912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,2,64,128,1,float16,fp8,0,0.04213866591453552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,2,64,128,1,fp8,fp8,0,0.040218666195869446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,2,64,0,1,float16,fp8,0,0.04345066845417023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,2,64,0,1,fp8,fp8,0,0.04073066761096319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,4,64,128,1,float16,float16,0,0.04252266883850098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,4,64,0,1,float16,float16,0,0.04292266567548116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,4,64,128,1,float16,fp8,0,0.04312533140182495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,4,64,128,1,fp8,fp8,0,0.040031999349594116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,4,64,0,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,4,64,0,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,8,64,128,1,float16,float16,0,0.04177066683769226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,8,64,0,1,float16,float16,0,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,8,64,128,1,float16,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,8,64,0,1,float16,fp8,0,0.04419733087221781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,8,64,128,1,fp8,fp8,0,0.04033066580692927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,8,64,0,1,fp8,fp8,0,0.041152000427246094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,48,64,128,1,float16,float16,0,0.02943466603755951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,48,64,0,1,float16,float16,0,0.030752000709374745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,48,64,128,1,float16,fp8,0,0.02792000025510788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,48,64,128,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,48,64,0,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,48,64,0,1,fp8,fp8,0,0.02769600103298823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,2,64,128,1,float16,float16,0,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,2,64,0,1,float16,float16,0,0.02897600084543228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,2,64,128,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,2,64,128,1,float16,fp8,0,0.028837333122889202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,2,64,0,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,2,64,0,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,4,64,0,1,float16,float16,0,0.027701333165168762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,4,64,128,1,float16,float16,0,0.028405333558718365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,4,64,128,1,float16,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,4,64,128,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,4,64,0,1,fp8,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,4,64,0,1,float16,fp8,0,0.029978667696317036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,8,64,128,1,float16,float16,0,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,8,64,0,1,float16,float16,0,0.029525332152843475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,8,64,128,1,fp8,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,8,64,128,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,8,64,0,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,8,64,0,1,fp8,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,48,64,0,1,float16,float16,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,48,64,128,1,float16,float16,0,0.022069332500298817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,48,64,128,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,48,64,128,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,48,64,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,48,64,0,1,float16,fp8,0,0.02223466585079829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,2,64,128,1,float16,float16,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,2,64,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,2,64,128,1,fp8,fp8,0,0.019999999552965164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,2,64,128,1,float16,fp8,0,0.021717332303524017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,2,64,0,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,2,64,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,4,64,0,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,4,64,128,1,float16,float16,0,0.0206986665725708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,4,64,128,1,float16,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,4,64,128,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,8,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,4,64,0,1,float16,fp8,0,0.021850667893886566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,4,64,0,1,fp8,fp8,0,0.020037333170572918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,8,64,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,8,64,128,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,8,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,8,64,0,1,fp8,fp8,0,0.020101333657900494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,8,64,0,1,float16,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,48,64,128,1,float16,float16,0,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,48,64,128,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,48,64,0,1,float16,float16,0,0.017658667018016178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,48,64,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,48,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,48,64,0,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,2,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,2,64,128,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,2,64,128,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,2,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,2,64,0,1,float16,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,2,64,0,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,4,64,128,1,float16,float16,0,0.018677332748969395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,4,64,0,1,float16,float16,0,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,4,64,128,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,4,64,128,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,4,64,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,4,64,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,8,64,128,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,8,64,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,8,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,8,64,128,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,8,64,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,8,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,48,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,48,64,128,1,float16,float16,0,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,48,64,128,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,48,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,48,64,0,1,float16,fp8,0,0.015722667177518208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,48,64,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,2,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,2,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,2,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,2,64,128,1,fp8,fp8,0,0.01613333324591319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,2,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,2,64,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,4,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,4,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,4,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,4,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,4,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,4,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,8,64,128,1,float16,float16,0,0.01578666642308235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,8,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,8,64,128,1,float16,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,8,64,128,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,8,64,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,48,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,8,64,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,48,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,48,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,48,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,48,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,48,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,2,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,2,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,2,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,2,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,2,64,0,1,float16,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,2,64,0,1,fp8,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,4,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,4,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,4,64,0,1,float16,float16,0,0.01580799991885821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,4,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,4,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,8,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,4,64,0,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,8,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,8,64,128,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,8,64,128,1,fp8,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,8,64,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,8,64,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,2,64,0,1,float16,float16,0,0.1976213256518046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,2,64,128,1,float16,float16,0,0.19814932346343994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,2,64,128,1,float16,fp8,0,0.1957599918047587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,2,64,128,1,fp8,fp8,0,0.17750400304794312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,2,64,0,1,fp8,fp8,0,0.1788960099220276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,2,64,0,1,float16,fp8,0,0.1956640084584554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,4,64,128,1,float16,float16,0,0.19617066780726114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,4,64,0,1,float16,float16,0,0.1958613395690918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,4,64,128,1,float16,fp8,0,0.19578667481740317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,4,64,128,1,fp8,fp8,0,0.1790026624997457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,4,64,0,1,float16,fp8,0,0.1964213252067566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,4,64,0,1,fp8,fp8,0,0.17739200592041016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,8,64,0,1,float16,float16,0,0.19557867447535196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,8,64,128,1,float16,float16,0,0.19773866732915243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,8,64,128,1,float16,fp8,0,0.19591466585795084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,8,64,128,1,fp8,fp8,0,0.17698667446772257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,8,64,0,1,float16,fp8,0,0.19662932554880777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,8,64,0,1,fp8,fp8,0,0.17751999696095785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,48,64,128,1,float16,float16,0,0.10610133409500122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,0,0.10524800419807434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,48,64,128,1,float16,fp8,0,0.10522666573524475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,48,64,128,1,fp8,fp8,0,0.09714133540789287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,48,64,0,1,fp8,fp8,0,0.09594133496284485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,0,0.10619200269381206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,2,64,128,1,float16,float16,0,0.1050879955291748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,2,64,0,1,float16,float16,0,0.10571199655532837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,2,64,128,1,float16,fp8,0,0.1051093339920044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,2,64,0,1,float16,fp8,0,0.1053600013256073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,2,64,128,1,fp8,fp8,0,0.0958666702111562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,2,64,0,1,fp8,fp8,0,0.09554133812586467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,4,64,128,1,float16,float16,0,0.10521599650382996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,4,64,0,1,float16,float16,0,0.10542399684588115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,4,64,128,1,float16,fp8,0,0.10558399558067322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,4,64,128,1,fp8,fp8,0,0.09541866183280945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,4,64,0,1,float16,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,4,64,0,1,fp8,fp8,0,0.0969493289788564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,8,64,128,1,float16,float16,0,0.10537599523862202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,8,64,0,1,float16,float16,0,0.10526933272679646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,8,64,128,1,float16,fp8,0,0.10563733180363973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,8,64,128,1,fp8,fp8,0,0.0974079966545105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,8,64,0,1,float16,fp8,0,0.10532266894976298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,8,64,0,1,fp8,fp8,0,0.09701333443323772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,48,64,128,1,float16,float16,0,0.060559997955958046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,0,0.06029866635799408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,48,64,128,1,float16,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,48,64,128,1,fp8,fp8,0,0.05648000041643778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,0,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,48,64,0,1,fp8,fp8,0,0.056218668818473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,2,64,0,1,float16,float16,0,0.06047999858856201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,2,64,128,1,float16,float16,0,0.060378665725390114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,2,64,128,1,float16,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,2,64,128,1,fp8,fp8,0,0.05584000051021576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,2,64,0,1,float16,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,2,64,0,1,fp8,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,4,64,128,1,float16,float16,0,0.0602453351020813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,4,64,128,1,float16,fp8,0,0.06102933486302694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,4,64,0,1,float16,float16,0,0.06072533130645752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,4,64,128,1,fp8,fp8,0,0.05601066847642263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,4,64,0,1,float16,fp8,0,0.06071466704209646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,4,64,0,1,fp8,fp8,0,0.05671999851862589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,8,64,128,1,float16,float16,0,0.059952000776926674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,8,64,0,1,float16,float16,0,0.05997333427270254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,8,64,128,1,float16,fp8,0,0.06081599990526835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,8,64,128,1,fp8,fp8,0,0.05611200133959452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,8,64,0,1,float16,fp8,0,0.060234665870666504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,8,64,0,1,fp8,fp8,0,0.056133334835370384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,48,64,128,1,float16,float16,0,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,0,0.03774400055408478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,48,64,128,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,48,64,128,1,fp8,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,0,0.03903999924659729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,48,64,0,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,2,64,128,1,float16,float16,0,0.037733333806196846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,2,64,0,1,float16,float16,0,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,2,64,128,1,float16,fp8,0,0.03959999978542328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,2,64,128,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,2,64,0,1,float16,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,2,64,0,1,fp8,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,4,64,128,1,float16,float16,0,0.037231999138991036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,4,64,0,1,float16,float16,0,0.037765334049860634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,4,64,128,1,float16,fp8,0,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,4,64,128,1,fp8,fp8,0,0.0354666660229365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,4,64,0,1,float16,fp8,0,0.037445334096749626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,4,64,0,1,fp8,fp8,0,0.03598399957021078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,8,64,128,1,float16,float16,0,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,8,64,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,8,64,128,1,float16,fp8,0,0.03803733239571253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,8,64,128,1,fp8,fp8,0,0.035631999373435974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,8,64,0,1,float16,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,8,64,0,1,fp8,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,48,64,128,1,float16,float16,0,0.025087999800841015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,48,64,128,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,48,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,48,64,0,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,2,64,128,1,float16,float16,0,0.02717866748571396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,2,64,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,2,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,2,64,128,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,2,64,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,2,64,0,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,4,64,128,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,4,64,0,1,float16,float16,0,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,4,64,128,1,float16,fp8,0,0.027210667729377747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,4,64,128,1,fp8,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,4,64,0,1,float16,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,4,64,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,8,64,128,1,float16,float16,0,0.0286613330245018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,8,64,0,1,float16,float16,0,0.028245332340399425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,8,64,128,1,float16,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,8,64,128,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,8,64,0,1,float16,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,8,64,0,1,fp8,fp8,0,0.02571733295917511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,48,64,128,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,48,64,128,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,48,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,48,64,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,2,64,128,1,float16,float16,0,0.020319999506076176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,2,64,0,1,float16,float16,0,0.0205226664741834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,2,64,128,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,2,64,128,1,fp8,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,2,64,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,4,64,128,1,float16,float16,0,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,4,64,0,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,2,64,0,1,float16,fp8,0,0.022650666534900665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,4,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,4,64,128,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,4,64,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,8,64,128,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,8,64,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,8,64,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,4,64,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,8,64,128,1,fp8,fp8,0,0.020319999506076176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,8,64,0,1,float16,fp8,0,0.019733333339293797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,48,64,128,1,float16,float16,0,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,8,64,0,1,fp8,fp8,0,0.020629333953062694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,48,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,48,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,0,0.018437333405017853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,48,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,2,64,128,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,2,64,128,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,2,64,128,1,fp8,fp8,0,0.017984000345071156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,2,64,0,1,float16,float16,0,0.018794666975736618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,2,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,2,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,4,64,128,1,float16,float16,0,0.018031999468803406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,4,64,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,4,64,128,1,float16,fp8,0,0.018122666825850803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,4,64,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,4,64,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,4,64,128,1,fp8,fp8,0,0.018101333330074947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,8,64,128,1,float16,float16,0,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,8,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,8,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,8,64,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,8,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,48,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,8,64,0,1,fp8,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,48,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,48,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,0,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,48,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,2,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,2,64,0,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,2,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,2,64,128,1,fp8,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,2,64,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,4,64,128,1,float16,float16,0,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,2,64,0,1,fp8,fp8,0,0.015775999675194424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,4,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,4,64,128,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,4,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,4,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,8,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,8,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,8,64,128,1,float16,float16,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,8,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,8,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,8,64,0,1,fp8,fp8,0,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,48,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,48,64,128,1,float16,fp8,0,0.01613333324591319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,48,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,2,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,2,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,48,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,2,64,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,2,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,2,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,2,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,4,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,4,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,4,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,4,64,128,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,4,64,0,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,4,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,8,64,128,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,8,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,8,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,8,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,8,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,8,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,float16,0,2.042421340942383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,2,64,128,1,float16,fp8,0,2.0564053853352866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,2,64,128,1,fp8,fp8,0,1.8997066815694172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,float16,0,2.058037281036377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,float16,0,10.655471801757812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,2,64,0,1,fp8,fp8,0,9.189893086751303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,2,64,0,1,float16,fp8,0,10.639216105143229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,4,64,128,1,float16,fp8,0,2.074293295542399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,4,64,128,1,fp8,fp8,0,1.9153706232706706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,float16,0,2.08569606145223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,float16,0,10.66539192199707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,8,64,128,1,float16,fp8,0,2.10752534866333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,4,64,0,1,fp8,fp8,0,9.205792109171549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,8,64,128,1,fp8,fp8,0,1.953829288482666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,4,64,0,1,float16,fp8,0,10.685680389404297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,float16,0,1.1999786694844563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,float16,0,10.742741902669271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,40,64,128,1,float16,fp8,0,1.2285280227661133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,40,64,128,1,fp8,fp8,0,1.1572693188985188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,float16,0,5.593130747477214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,8,64,0,1,fp8,fp8,0,9.230069478352865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,8,64,0,1,float16,fp8,0,10.753412882486979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,float16,0,1.0618613560994465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,2,64,128,1,float16,fp8,0,1.0758612950642903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,40,64,0,1,float16,fp8,0,5.63369623819987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,40,64,0,1,fp8,fp8,0,4.854544003804524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,2,64,128,1,fp8,fp8,0,0.9904053211212158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,float16,0,5.4150136311848955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,float16,0,1.0705280303955078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,2,64,0,1,float16,fp8,0,5.425221125284831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,2,64,0,1,fp8,fp8,0,4.675039927164714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,4,64,128,1,float16,fp8,0,1.083674669265747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,4,64,128,1,fp8,fp8,0,1.00054931640625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,float16,0,1.0856800079345703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,float16,0,5.428245544433594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,8,64,128,1,float16,fp8,0,1.098101298014323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,4,64,0,1,float16,fp8,0,5.446325302124023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,4,64,0,1,fp8,fp8,0,4.675008138020833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,8,64,128,1,fp8,fp8,0,1.0170880158742268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,float16,0,0.6617279847462972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,float16,0,5.436522801717122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,40,64,128,1,float16,fp8,0,0.6779359976450602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,40,64,128,1,fp8,fp8,0,0.6458133459091187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,float16,0,2.9080320994059243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,8,64,0,1,fp8,fp8,0,4.694426536560059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,8,64,0,1,float16,fp8,0,5.45307731628418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,float16,0,0.5973173379898071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,2,64,128,1,float16,fp8,0,0.603653351465861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,40,64,0,1,fp8,fp8,0,2.529424031575521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,40,64,0,1,float16,fp8,0,2.91649595896403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,2,64,128,1,fp8,fp8,0,0.5627040068308512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,float16,0,2.80953057607015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,float16,0,0.6010826826095581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,4,64,128,1,float16,fp8,0,0.6070506572723389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,2,64,0,1,fp8,fp8,0,2.4375999768575034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,2,64,0,1,float16,fp8,0,2.822335879007975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,4,64,128,1,fp8,fp8,0,0.5663413206736246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,float16,0,2.821610768636068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,float16,0,0.6076800028483073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,8,64,128,1,float16,fp8,0,0.6159146626790365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,4,64,0,1,fp8,fp8,0,2.445861339569092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,4,64,0,1,float16,fp8,0,2.8207947413126626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,8,64,128,1,fp8,fp8,0,0.576042652130127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,float16,0,0.4630933205286662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,float16,0,2.825253486633301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,40,64,128,1,float16,fp8,0,0.4628373384475708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,8,64,0,1,fp8,fp8,0,2.451258659362793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,40,64,128,1,fp8,fp8,0,0.4370186726252238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,8,64,0,1,float16,fp8,0,2.872511863708496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,float16,0,1.6252479553222656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,float16,0,0.459386666615804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,40,64,0,1,float16,fp8,0,1.6220533053080242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,40,64,0,1,fp8,fp8,0,1.417450745900472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,2,64,128,1,float16,fp8,0,0.46269333362579346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,float16,0,1.6113227208455403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,2,64,128,1,fp8,fp8,0,0.4341813325881958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,float16,0,0.46299199263254803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,2,64,0,1,fp8,fp8,0,1.4117813110351562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,2,64,0,1,float16,fp8,0,1.6081387201944988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,4,64,128,1,float16,fp8,0,0.4621493419011434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,4,64,128,1,fp8,fp8,0,0.4368533293406169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,float16,0,1.6090186436971028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,float16,0,0.46295468012491864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,4,64,0,1,fp8,fp8,0,1.4170026779174805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,4,64,0,1,float16,fp8,0,1.6121759414672852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,8,64,128,1,float16,fp8,0,0.46288001537323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,8,64,128,1,fp8,fp8,0,0.4373066822687785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,float16,0,1.6081387201944988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,8,64,0,1,float16,fp8,0,1.613957405090332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,8,64,0,1,fp8,fp8,0,1.4125653902689617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,float16,0,1.5143626530965169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,2,64,128,1,fp8,fp8,0,1.4049973487854004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,2,64,128,1,float16,fp8,0,1.529333273569743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,float16,0,1.5261546770731609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,float16,0,6.337727864583333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,4,64,128,1,float16,fp8,0,1.5400373140970867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,2,64,0,1,float16,fp8,0,6.3424638112386065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,2,64,0,1,fp8,fp8,0,5.489402770996094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,4,64,128,1,fp8,fp8,0,1.4193545977274578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,float16,0,6.340917587280273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,float16,0,1.5467467308044434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,8,64,128,1,float16,fp8,0,1.5632106463114421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,4,64,0,1,fp8,fp8,0,5.490842819213867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,4,64,0,1,float16,fp8,0,6.352575937906901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,8,64,128,1,fp8,fp8,0,1.452741305033366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,float16,0,0.9022293090820312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,float16,0,6.389935811360677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,40,64,128,1,float16,fp8,0,0.9237279891967773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,40,64,128,1,fp8,fp8,0,0.8708639939626058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,float16,0,3.3730347951253257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,8,64,0,1,fp8,fp8,0,5.513738632202148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,8,64,0,1,float16,fp8,0,6.41322644551595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,float16,0,0.800330638885498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,2,64,128,1,float16,fp8,0,0.8098186651865641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,40,64,0,1,fp8,fp8,0,2.94049072265625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,40,64,0,1,float16,fp8,0,3.4001601537068686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,2,64,128,1,fp8,fp8,0,0.7465653419494629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,float16,0,3.241567929585775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,float16,0,0.808351993560791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,4,64,128,1,float16,fp8,0,0.8161066373189291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,2,64,0,1,fp8,fp8,0,2.8099145889282227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,2,64,0,1,float16,fp8,0,3.2456267674764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,4,64,128,1,fp8,fp8,0,0.7539520263671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,float16,0,3.24509334564209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,float16,0,0.8168319861094157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,4,64,0,1,fp8,fp8,0,2.812704086303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,4,64,0,1,float16,fp8,0,3.251914660135905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,8,64,128,1,float16,fp8,0,0.8266080220540365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,8,64,128,1,fp8,fp8,0,0.7662133375803629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,float16,0,3.2591733932495117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,float16,0,0.5007893244425455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,8,64,0,1,fp8,fp8,0,2.830597241719564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,8,64,0,1,float16,fp8,0,3.2752374013264975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,float16,0,1.769781271616618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,40,64,128,1,float16,fp8,0,0.51528000831604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,40,64,128,1,fp8,fp8,0,0.49139201641082764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,float16,0,0.454367995262146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,40,64,0,1,float16,fp8,0,1.780394713083903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,2,64,128,1,float16,fp8,0,0.45873598257700604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,40,64,0,1,fp8,fp8,0,1.5553119977315266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,float16,0,1.699519952138265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,2,64,128,1,fp8,fp8,0,0.42741334438323975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,float16,0,0.45740799109141034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,2,64,0,1,fp8,fp8,0,1.4855945905049641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,2,64,0,1,float16,fp8,0,1.7076533635457356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,4,64,128,1,float16,fp8,0,0.4622773329416911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,4,64,128,1,fp8,fp8,0,0.43110398451487225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,float16,0,1.7074400583902996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,float16,0,0.46171732743581134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,4,64,0,1,fp8,fp8,0,1.4863519668579102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,4,64,0,1,float16,fp8,0,1.7114933331807454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,8,64,128,1,float16,fp8,0,0.4676213264465332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,8,64,128,1,fp8,fp8,0,0.4371519883473714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,float16,0,1.714197317759196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,float16,0,0.3511039813359578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,8,64,0,1,fp8,fp8,0,1.4955466588338215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,40,64,128,1,float16,fp8,0,0.35104533036549884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,8,64,0,1,float16,fp8,0,1.7207627296447754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,float16,0,1.01582932472229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,40,64,128,1,fp8,fp8,0,0.33296000957489014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,float16,0,0.35258134206136066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,40,64,0,1,fp8,fp8,0,0.8938506444295248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,40,64,0,1,float16,fp8,0,1.016858657201131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,2,64,128,1,float16,fp8,0,0.35155733426411945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,2,64,128,1,fp8,fp8,0,0.3307466705640157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,float16,0,1.004426638285319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,float16,0,0.3510560194651286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,2,64,0,1,float16,fp8,0,1.003322680791219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,2,64,0,1,fp8,fp8,0,0.8888586362202963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,4,64,128,1,float16,fp8,0,0.35123201211293537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,float16,0,1.005669355392456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,4,64,128,1,fp8,fp8,0,0.3306399981180827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,float16,0,0.350874662399292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,4,64,0,1,float16,fp8,0,1.008032004038493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,4,64,0,1,fp8,fp8,0,0.8894453048706055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,8,64,128,1,float16,fp8,0,0.3513973156611125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,float16,0,1.006650686264038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,8,64,128,1,fp8,fp8,0,0.33319467306137085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,8,64,0,1,float16,fp8,0,1.008501370747884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,8,64,0,1,fp8,fp8,0,0.8888746897379557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,float16,0,1.2596320311228435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,2,64,128,1,fp8,fp8,0,1.165834665298462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,2,64,128,1,float16,fp8,0,1.2736533482869465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,float16,0,1.2693226337432861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,float16,0,4.580426534016927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,2,64,0,1,float16,fp8,0,4.5904585520426435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,2,64,0,1,fp8,fp8,0,3.9711786905924478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,4,64,128,1,float16,fp8,0,1.280176003774007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,4,64,128,1,fp8,fp8,0,1.1785600185394287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,float16,0,4.6009171803792315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,float16,0,1.2867039839426677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,4,64,0,1,fp8,fp8,0,3.9855146408081055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,8,64,128,1,float16,fp8,0,1.3002506891886394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,4,64,0,1,float16,fp8,0,4.619135856628418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,8,64,128,1,fp8,fp8,0,1.2022346655527751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,float16,0,0.7538666725158691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,float16,0,4.628992080688477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,40,64,128,1,float16,fp8,0,0.771562655766805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,8,64,0,1,fp8,fp8,0,4.009728113810222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,8,64,0,1,float16,fp8,0,4.643280029296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,40,64,128,1,fp8,fp8,0,0.7270399729410807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,float16,0,2.4737332661946616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,float16,0,0.6702187061309814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,2,64,128,1,float16,fp8,0,0.6769226392110189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,40,64,0,1,fp8,fp8,0,2.1617066065470376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,40,64,0,1,float16,fp8,0,2.4901866912841797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,2,64,128,1,fp8,fp8,0,0.6257280111312866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,float16,0,2.3550987243652344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,float16,0,0.6760586897532145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,4,64,128,1,float16,fp8,0,0.6838400363922119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,2,64,0,1,fp8,fp8,0,2.0498719215393066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,2,64,0,1,float16,fp8,0,2.365642706553141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,4,64,128,1,fp8,fp8,0,0.6317760149637858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,float16,0,2.360960006713867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,float16,0,0.683130661646525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,4,64,0,1,fp8,fp8,0,2.0543039639790854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,4,64,0,1,float16,fp8,0,2.374021371205648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,8,64,128,1,float16,fp8,0,0.691706657409668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,8,64,128,1,fp8,fp8,0,0.6417760054270426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,float16,0,2.3753973642985025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,float16,0,0.4206453164418538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,40,64,128,1,float16,fp8,0,0.43134931723276776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,8,64,0,1,fp8,fp8,0,2.07041072845459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,float16,0,1.3039093017578125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,8,64,0,1,float16,fp8,0,2.3924427032470703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,40,64,128,1,fp8,fp8,0,0.4126559893290202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,float16,0,0.38158400853474933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,40,64,0,1,float16,fp8,0,1.3175733089447021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,40,64,0,1,fp8,fp8,0,1.1500266393025715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,2,64,128,1,float16,fp8,0,0.3839199940363566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,2,64,128,1,fp8,fp8,0,0.3592533270517985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,float16,0,1.2480106353759766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,float16,0,0.3840693235397339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,2,64,0,1,fp8,fp8,0,1.0957067012786865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,2,64,0,1,float16,fp8,0,1.252453327178955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,4,64,128,1,float16,fp8,0,0.38650667667388916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,4,64,128,1,fp8,fp8,0,0.36191999912261963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,float16,0,1.2550506591796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,float16,0,0.3877919912338257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,4,64,0,1,fp8,fp8,0,1.09662930170695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,4,64,0,1,float16,fp8,0,1.2556906541188557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,8,64,128,1,float16,fp8,0,0.394159992535909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,8,64,128,1,fp8,fp8,0,0.3677653471628825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,float16,0,1.258639971415202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,float16,0,0.2959679961204529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,8,64,0,1,fp8,fp8,0,1.1042826970418294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,8,64,0,1,float16,fp8,0,1.2650293509165447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,40,64,128,1,float16,fp8,0,0.29793065786361694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,float16,0,0.7634560267130533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,40,64,128,1,fp8,fp8,0,0.2826133370399475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,40,64,0,1,float16,fp8,0,0.7650612990061442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,float16,0,0.2937706708908081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,40,64,0,1,fp8,fp8,0,0.6758826573689779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,2,64,128,1,float16,fp8,0,0.29436800877253216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,2,64,128,1,fp8,fp8,0,0.2776586612065633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,float16,0,0.7547199726104736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,float16,0,0.29364800453186035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,2,64,0,1,float16,fp8,0,0.754869302113851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,2,64,0,1,fp8,fp8,0,0.6675733725229899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,4,64,128,1,float16,fp8,0,0.29441599051157635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,float16,0,0.7573013305664062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,4,64,128,1,fp8,fp8,0,0.27956799666086835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,float16,0,0.29639466603597003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,4,64,0,1,float16,fp8,0,0.7569066683451334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,4,64,0,1,fp8,fp8,0,0.6713439623514811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,float16,0,0.7565333048502604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,8,64,128,1,fp8,fp8,0,0.2806826631228129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,8,64,128,1,float16,fp8,0,0.295909325281779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,8,64,0,1,float16,fp8,0,0.7570827007293701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,8,64,0,1,fp8,fp8,0,0.6686293284098307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,float16,0,1.9853386878967285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,2,64,128,1,fp8,fp8,0,1.8389546076456706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,2,64,128,1,float16,fp8,0,1.9986294110616047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,float16,0,2.0023093223571777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,float16,0,6.15550422668457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,2,64,0,1,fp8,fp8,0,5.349386850992839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,2,64,0,1,float16,fp8,0,6.179866790771484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,4,64,128,1,float16,fp8,0,2.018517335255941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,4,64,128,1,fp8,fp8,0,1.8593813578287761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,float16,0,6.177370707194011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,float16,0,2.031754652659098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,4,64,0,1,fp8,fp8,0,5.378314971923828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,4,64,0,1,float16,fp8,0,6.2062028249104815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,8,64,128,1,float16,fp8,0,2.0489652951558432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,8,64,128,1,fp8,fp8,0,1.8950613339742024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,float16,0,6.235349019368489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,float16,0,1.1467946370442708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,40,64,128,1,float16,fp8,0,1.1734986305236816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,8,64,0,1,fp8,fp8,0,5.411098480224609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,40,64,128,1,fp8,fp8,0,1.1007466316223145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,8,64,0,1,float16,fp8,0,6.256074905395508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,float16,0,3.29910945892334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,float16,0,1.0075466632843018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,40,64,0,1,fp8,fp8,0,2.90010134379069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,2,64,128,1,float16,fp8,0,1.018133322397868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,40,64,0,1,float16,fp8,0,3.326197306315104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,float16,0,3.1193920771280923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,2,64,128,1,fp8,fp8,0,0.9353066285451254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,float16,0,1.0160746574401855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,2,64,0,1,fp8,fp8,0,2.708863894144694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,2,64,0,1,float16,fp8,0,3.1276639302571616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,4,64,128,1,float16,fp8,0,1.026810646057129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,4,64,128,1,fp8,fp8,0,0.9439840316772461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,float16,0,3.125904083251953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,float16,0,1.0288906892140706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,4,64,0,1,fp8,fp8,0,2.7197227478027344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,4,64,0,1,float16,fp8,0,3.140533447265625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,8,64,128,1,float16,fp8,0,1.0417813460032146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,8,64,128,1,fp8,fp8,0,0.9606346289316813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,float16,0,3.1467787424723306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,float16,0,0.605679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,40,64,128,1,float16,fp8,0,0.6219253142674764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,8,64,0,1,fp8,fp8,0,2.737834612528483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,float16,0,1.7001973787943523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,8,64,0,1,float16,fp8,0,3.1653706232706704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,40,64,128,1,fp8,fp8,0,0.5866506497065226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,float16,0,0.5399946769078573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,2,64,128,1,float16,fp8,0,0.5444693168004354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,40,64,0,1,fp8,fp8,0,1.5037066141764324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,40,64,0,1,float16,fp8,0,1.7177866299947102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,float16,0,1.613983949025472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,2,64,128,1,fp8,fp8,0,0.5043946504592896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,float16,0,0.5417759815851847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,2,64,0,1,fp8,fp8,0,1.410213311513265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,4,64,128,1,float16,fp8,0,0.5477173328399658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,2,64,0,1,float16,fp8,0,1.6205760637919109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,float16,0,1.6161279678344727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,4,64,128,1,fp8,fp8,0,0.506938656171163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,float16,0,0.5496160189310709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,4,64,0,1,float16,fp8,0,1.6246347427368164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,4,64,0,1,fp8,fp8,0,1.412559986114502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,8,64,128,1,float16,fp8,0,0.5565973520278931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,8,64,128,1,fp8,fp8,0,0.5167946815490723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,float16,0,1.6315199534098308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,float16,0,0.3396586577097575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,8,64,0,1,float16,fp8,0,1.639418601989746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,8,64,0,1,fp8,fp8,0,1.4222346941630046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,float16,0,0.9093066851298014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,40,64,128,1,float16,fp8,0,0.3479199806849162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,40,64,128,1,fp8,fp8,0,0.33241599798202515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,float16,0,0.30424533287684125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,40,64,0,1,float16,fp8,0,0.9184426466623942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,40,64,0,1,fp8,fp8,0,0.8105279604593912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,2,64,128,1,float16,fp8,0,0.30826665957768756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,2,64,128,1,fp8,fp8,0,0.2900533278783162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,float16,0,0.8606346448262533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,float16,0,0.30613867441813153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,2,64,0,1,float16,fp8,0,0.8659626642862955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,2,64,0,1,fp8,fp8,0,0.7611253261566162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,4,64,128,1,float16,fp8,0,0.31039466460545856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,4,64,128,1,fp8,fp8,0,0.2925706704457601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,float16,0,0.8688159783681234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,float16,0,0.31251200040181476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,4,64,0,1,float16,fp8,0,0.8701600233713785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,4,64,0,1,fp8,fp8,0,0.7661546866099039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,8,64,128,1,float16,fp8,0,0.3154826760292053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,8,64,128,1,fp8,fp8,0,0.2981333335240682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,float16,0,0.8756799697875977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,float16,0,0.24225066105524698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,8,64,0,1,float16,fp8,0,0.8775839805603027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,8,64,0,1,fp8,fp8,0,0.768986701965332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,float16,0,0.5475626786549886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,40,64,128,1,float16,fp8,0,0.24064000447591147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,40,64,128,1,fp8,fp8,0,0.22875199715296426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,float16,0,0.23838400840759277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,40,64,0,1,fp8,fp8,0,0.48556800683339435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,40,64,0,1,float16,fp8,0,0.5473599831263224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,2,64,128,1,float16,fp8,0,0.2385866641998291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,float16,0,0.5369866689046224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,2,64,128,1,fp8,fp8,0,0.22660799821217856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,float16,0,0.24060799678166708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,2,64,0,1,float16,fp8,0,0.536298672358195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,2,64,0,1,fp8,fp8,0,0.4778933525085449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,float16,0,0.5362720092137655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,4,64,128,1,fp8,fp8,0,0.22683199246724448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,4,64,128,1,float16,fp8,0,0.2388533353805542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,4,64,0,1,float16,fp8,0,0.5388853152592977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,float16,0,0.24060267210006714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,4,64,0,1,fp8,fp8,0,0.47992531458536786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,float16,0,0.5386559963226318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,8,64,128,1,float16,fp8,0,0.24080532789230347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,8,64,128,1,fp8,fp8,0,0.22844266891479492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,8,64,0,1,float16,fp8,0,0.5402773221333822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,8,64,0,1,fp8,fp8,0,0.4819360176722209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,float16,0,1.4720053672790527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,2,64,128,1,float16,fp8,0,1.4842346509297688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,2,64,128,1,fp8,fp8,0,1.3612106641133626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,float16,0,3.7733866373697915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,float16,0,1.4835573832194011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,2,64,0,1,float16,fp8,0,3.778437296549479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,2,64,0,1,fp8,fp8,0,3.285834630330404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,4,64,128,1,float16,fp8,0,1.4988212585449219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,4,64,128,1,fp8,fp8,0,1.3735520044962566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,float16,0,3.787813186645508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,float16,0,1.5044053395589192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,4,64,0,1,fp8,fp8,0,3.304346720377604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,4,64,0,1,float16,fp8,0,3.8026132583618164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,8,64,128,1,float16,fp8,0,1.5209546089172363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,8,64,128,1,fp8,fp8,0,1.4041120211283367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,float16,0,3.8215999603271484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,float16,0,0.8618613084157308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,40,64,128,1,float16,fp8,0,0.8823520342508951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,8,64,0,1,float16,fp8,0,3.837130546569824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,float16,0,2.061786651611328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,8,64,0,1,fp8,fp8,0,3.3333441416422525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,40,64,128,1,fp8,fp8,0,0.8282079696655273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,float16,0,0.7582826614379883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,40,64,0,1,float16,fp8,0,2.0851146380106607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,40,64,0,1,fp8,fp8,0,1.825178623199463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,2,64,128,1,float16,fp8,0,0.7665599981943766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,float16,0,1.9270505905151367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,2,64,128,1,fp8,fp8,0,0.7062400182088217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,float16,0,0.7669653097788492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,2,64,0,1,fp8,fp8,0,1.6783307393391926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,2,64,0,1,float16,fp8,0,1.932773272196452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,4,64,128,1,float16,fp8,0,0.7752052942911783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,float16,0,1.93230406443278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,4,64,128,1,fp8,fp8,0,0.7115039825439453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,float16,0,0.7748426596323649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,4,64,0,1,float16,fp8,0,1.9430294036865234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,4,64,0,1,fp8,fp8,0,1.690933386484782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,8,64,128,1,float16,fp8,0,0.7856853008270264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,float16,0,1.9468159675598145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,8,64,128,1,fp8,fp8,0,0.7241493066151937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,float16,0,0.4588906764984131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,8,64,0,1,float16,fp8,0,1.9594079653422039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,8,64,0,1,fp8,fp8,0,1.7052159309387207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,float16,0,1.0730026563008626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,40,64,128,1,float16,fp8,0,0.47225598494211835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,40,64,128,1,fp8,fp8,0,0.4437280098597209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,float16,0,0.4082560141881307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,40,64,0,1,float16,fp8,0,1.0852479934692383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,40,64,0,1,fp8,fp8,0,0.956442674001058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,2,64,128,1,float16,fp8,0,0.41233599185943604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,2,64,128,1,fp8,fp8,0,0.3838293155034383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,float16,0,1.0045119921366374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,float16,0,0.4105600118637085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,2,64,0,1,float16,fp8,0,1.0073973337809246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,2,64,0,1,fp8,fp8,0,0.8841493129730225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,4,64,128,1,float16,fp8,0,0.41817601521809894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,4,64,128,1,fp8,fp8,0,0.38631999492645264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,float16,0,1.0102826754252117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,float16,0,0.41673600673675537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,4,64,0,1,float16,fp8,0,1.0136106808980305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,4,64,0,1,fp8,fp8,0,0.8883893489837646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,8,64,128,1,float16,fp8,0,0.42151467005411786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,8,64,128,1,fp8,fp8,0,0.3944106499354045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,float16,0,1.0177013079325359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,float16,0,0.2598399917284648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,float16,0,0.5824000040690104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,40,64,128,1,float16,fp8,0,0.26761066913604736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,8,64,0,1,fp8,fp8,0,0.8952319622039795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,8,64,0,1,float16,fp8,0,1.0226826667785645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,40,64,128,1,fp8,fp8,0,0.25489600499471027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,float16,0,0.23054399092992148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,40,64,0,1,fp8,fp8,0,0.5226346651713053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,40,64,0,1,float16,fp8,0,0.5905919869740804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,2,64,128,1,float16,fp8,0,0.23332266012827554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,float16,0,0.5442293485005697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,2,64,128,1,fp8,fp8,0,0.22033600012461343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,2,64,0,1,float16,fp8,0,0.5464426676432291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,float16,0,0.2323840061823527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,2,64,0,1,fp8,fp8,0,0.48396801948547363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,float16,0,0.5494453509648641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,4,64,128,1,fp8,fp8,0,0.22427199284235635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,4,64,128,1,float16,fp8,0,0.23493333657582602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,4,64,0,1,float16,fp8,0,0.5505760113398234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,float16,0,0.2364586591720581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,4,64,0,1,fp8,fp8,0,0.48867201805114746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,float16,0,0.5523253281911215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,8,64,128,1,float16,fp8,0,0.2405760089556376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,8,64,128,1,fp8,fp8,0,0.22611733277638754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,float16,0,0.18677333990732828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,8,64,0,1,float16,fp8,0,0.5550026496251425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,float16,0,0.36309866110483807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,8,64,0,1,fp8,fp8,0,0.4928586483001709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,40,64,128,1,float16,fp8,0,0.18600533405939737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,40,64,128,1,fp8,fp8,0,0.17692800362904867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,40,64,0,1,float16,fp8,0,0.36324799060821533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,float16,0,0.18322134017944336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,40,64,0,1,fp8,fp8,0,0.32495999336242676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,float16,0,0.3534239927927653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,2,64,128,1,float16,fp8,0,0.18357867002487183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,2,64,128,1,fp8,fp8,0,0.1741173267364502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,2,64,0,1,float16,fp8,0,0.3550826708475749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,2,64,0,1,fp8,fp8,0,0.3163839975992839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,float16,0,0.18314133087793985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,float16,0,0.353274663289388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,4,64,128,1,float16,fp8,0,0.18346667289733887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,4,64,128,1,fp8,fp8,0,0.17452800273895264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,4,64,0,1,float16,fp8,0,0.354912002881368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,4,64,0,1,fp8,fp8,0,0.3163359959920247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,float16,0,0.18367467323939005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,float16,0,0.35490667819976807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,8,64,128,1,float16,fp8,0,0.18445332845052084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,8,64,128,1,fp8,fp8,0,0.17491199572881064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,8,64,0,1,float16,fp8,0,0.3562399943669637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,8,64,0,1,fp8,fp8,0,0.31834133466084796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,float16,0,1.9523733456929524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,2,64,128,1,fp8,fp8,0,1.8067092895507812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,2,64,128,1,float16,fp8,0,1.96998929977417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,float16,0,3.8811893463134766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,float16,0,1.9709866841634114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,2,64,0,1,fp8,fp8,0,3.4049228032430015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,2,64,0,1,float16,fp8,0,3.8943894704182944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,4,64,128,1,float16,fp8,0,1.9872746467590332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,4,64,128,1,fp8,fp8,0,1.825503985087077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,float16,0,3.9039732615152993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,4,64,0,1,float16,fp8,0,3.9184961318969727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,float16,0,2.0005173683166504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,4,64,0,1,fp8,fp8,0,3.4344800313313804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,float16,0,3.946928024291992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,8,64,128,1,fp8,fp8,0,1.8600746790568035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,8,64,128,1,float16,fp8,0,2.0187253952026367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,float16,0,1.115594704945882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,float16,0,2.150458653767904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,8,64,0,1,float16,fp8,0,3.9649438858032227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,40,64,128,1,float16,fp8,0,1.1393493016560872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,8,64,0,1,fp8,fp8,0,3.4752480189005532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,40,64,128,1,fp8,fp8,0,1.0694986979166667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,float16,0,0.9795200030008951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,40,64,0,1,fp8,fp8,0,1.9074133237202961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,40,64,0,1,float16,fp8,0,2.1751413345336914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,float16,0,1.951269308725993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,2,64,128,1,float16,fp8,0,0.9894453684488932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,2,64,128,1,fp8,fp8,0,0.905290683110555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,2,64,0,1,float16,fp8,0,1.9617600440979004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,float16,0,0.9887999693552653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,2,64,0,1,fp8,fp8,0,1.7148693402608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,4,64,128,1,float16,fp8,0,0.9965759913126627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,4,64,128,1,fp8,fp8,0,0.9133546352386475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,float16,0,1.9641653696695964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,float16,0,1.0031306743621826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,4,64,0,1,float16,fp8,0,1.9700746536254883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,4,64,0,1,fp8,fp8,0,1.725861390431722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,8,64,128,1,float16,fp8,0,1.0123733679453533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,8,64,128,1,fp8,fp8,0,0.9322026570638021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,float16,0,1.9851840337117512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,float16,0,0.57915198802948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,8,64,0,1,fp8,fp8,0,1.7469546000162761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,8,64,0,1,float16,fp8,0,1.9928480784098308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,40,64,128,1,float16,fp8,0,0.5934773286183676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,float16,0,1.1066666444142659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,40,64,128,1,fp8,fp8,0,0.5577013492584229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,40,64,0,1,float16,fp8,0,1.1184053421020508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,float16,0,0.5121386845906576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,40,64,0,1,fp8,fp8,0,0.9849706490834554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,float16,0,1.0068853696187336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,2,64,128,1,float16,fp8,0,0.5168693463007609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,2,64,128,1,fp8,fp8,0,0.4763466517130534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,float16,0,0.5150826772054037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,2,64,0,1,float16,fp8,0,1.0132426420847576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,2,64,0,1,fp8,fp8,0,0.8894293308258057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,float16,0,1.0121973355611165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,4,64,128,1,float16,fp8,0,0.519045352935791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,4,64,128,1,fp8,fp8,0,0.4809173345565796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,4,64,0,1,float16,fp8,0,1.0182773272196453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,float16,0,0.5216000080108643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,4,64,0,1,fp8,fp8,0,0.8956747055053711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,float16,0,1.0216106573740642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,8,64,128,1,fp8,fp8,0,0.4886933167775472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,8,64,128,1,float16,fp8,0,0.529146671295166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,float16,0,0.31113600730895996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,8,64,0,1,float16,fp8,0,1.029647986094157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,float16,0,0.5845066706339518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,8,64,0,1,fp8,fp8,0,0.9049066702524821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,40,64,128,1,float16,fp8,0,0.31891733407974243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,40,64,128,1,fp8,fp8,0,0.30188800891240436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,float16,0,0.2733760078748067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,40,64,0,1,float16,fp8,0,0.5925600131352743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,40,64,0,1,fp8,fp8,0,0.5251306692759196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,2,64,128,1,float16,fp8,0,0.27611200014750165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,float16,0,0.5318453311920166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,2,64,128,1,fp8,fp8,0,0.260917325814565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,float16,0,0.27619733413060504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,2,64,0,1,float16,fp8,0,0.534986654917399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,2,64,0,1,fp8,fp8,0,0.4768853187561035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,float16,0,0.5358239809672037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,4,64,128,1,float16,fp8,0,0.2804960012435913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,4,64,128,1,fp8,fp8,0,0.26344533761342365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,4,64,0,1,float16,fp8,0,0.5391093492507935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,4,64,0,1,fp8,fp8,0,0.4803520043690999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,float16,0,0.2821813424428304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,float16,0,0.5427626768747965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,8,64,128,1,float16,fp8,0,0.28522666295369464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,8,64,128,1,fp8,fp8,0,0.2690826654434204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,8,64,0,1,float16,fp8,0,0.5465759833653768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,float16,0,0.18050666650136313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,float16,0,0.3248746593793233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,8,64,0,1,fp8,fp8,0,0.48481067021687824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,40,64,128,1,float16,fp8,0,0.18445332845052084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,40,64,128,1,fp8,fp8,0,0.1771519978841146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,40,64,0,1,float16,fp8,0,0.3306559920310974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,float16,0,0.15828800201416016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,40,64,0,1,fp8,fp8,0,0.296015997727712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,float16,0,0.29605333010355633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,2,64,128,1,float16,fp8,0,0.1572426656881968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,2,64,128,1,fp8,fp8,0,0.15153599778811136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,2,64,0,1,fp8,fp8,0,0.2667466600735982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,2,64,0,1,float16,fp8,0,0.2965493400891622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,float16,0,0.1567466656366984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,float16,0,0.2962026596069336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,4,64,128,1,float16,fp8,0,0.15964266657829285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,4,64,128,1,fp8,fp8,0,0.15415466825167337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,4,64,0,1,float16,fp8,0,0.2971946597099304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,4,64,0,1,fp8,fp8,0,0.268832008043925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,float16,0,0.16056000192960104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,8,64,128,1,float16,fp8,0,0.16288000345230103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,float16,0,0.30008532603581745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,8,64,128,1,fp8,fp8,0,0.15632533033688864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,8,64,0,1,float16,fp8,0,0.3022613326708476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,float16,0,0.13030399878819784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,8,64,0,1,fp8,fp8,0,0.27349867423375446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,float16,0,0.20826667547225952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,40,64,128,1,float16,fp8,0,0.13079466422398886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,40,64,128,1,fp8,fp8,0,0.12426132957140605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,40,64,0,1,float16,fp8,0,0.20933866500854492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,40,64,0,1,fp8,fp8,0,0.18941332896550497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,float16,0,0.12796266873677573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,2,64,128,1,float16,fp8,0,0.1276853382587433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,2,64,128,1,fp8,fp8,0,0.12070932984352112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,float16,0,0.20566932360331217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,2,64,0,1,float16,fp8,0,0.20534400145212808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,float16,0,0.127920001745224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,2,64,0,1,fp8,fp8,0,0.1852160096168518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,float16,0,0.2064853310585022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,4,64,128,1,float16,fp8,0,0.12811199824015299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,4,64,128,1,fp8,fp8,0,0.1216213305791219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,4,64,0,1,fp8,fp8,0,0.1860640048980713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,4,64,0,1,float16,fp8,0,0.20642133553822836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,float16,0,0.12774933377901712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,8,64,128,1,float16,fp8,0,0.1281599998474121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,float16,0,0.20658665895462036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,8,64,128,1,fp8,fp8,0,0.1213759978612264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,8,64,0,1,float16,fp8,0,0.20721065998077393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,8,64,0,1,fp8,fp8,0,0.18594666322072348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,float16,0,1.4489599863688152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,2,64,128,1,float16,fp8,0,1.4607092539469402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,float16,0,2.467199961344401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,2,64,128,1,fp8,fp8,0,1.3373653093973796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,2,64,0,1,float16,fp8,0,2.4830986658732095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,float16,0,1.4569066365559895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,2,64,0,1,fp8,fp8,0,2.1820480028788247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,4,64,128,1,float16,fp8,0,1.4709653854370117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,4,64,128,1,fp8,fp8,0,1.3513706525166829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,float16,0,2.487071990966797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,float16,0,1.4831306139628093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,4,64,0,1,float16,fp8,0,2.496394634246826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,4,64,0,1,fp8,fp8,0,2.1988159815470376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,float16,0,2.5145813624064126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,8,64,128,1,fp8,fp8,0,1.3799360593159993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,8,64,128,1,float16,fp8,0,1.4966506958007812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,float16,0,0.8406506379445394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,8,64,0,1,float16,fp8,0,2.527440071105957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,float16,0,1.385818640391032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,40,64,128,1,float16,fp8,0,0.85807998975118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,8,64,0,1,fp8,fp8,0,2.234528064727783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,40,64,128,1,fp8,fp8,0,0.8057119846343994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,40,64,0,1,float16,fp8,0,1.4054826100667317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,float16,0,0.739349365234375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,40,64,0,1,fp8,fp8,0,1.2546239693959553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,float16,0,1.2568106651306152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,2,64,128,1,float16,fp8,0,0.7452106475830078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,2,64,128,1,fp8,fp8,0,0.683690627415975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,float16,0,0.7441120147705078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,2,64,0,1,float16,fp8,0,1.263797362645467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,2,64,0,1,fp8,fp8,0,1.1129173437754314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,float16,0,1.2642666498819988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,4,64,128,1,fp8,fp8,0,0.6897599697113037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,4,64,128,1,float16,fp8,0,0.7521386941274008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,4,64,0,1,float16,fp8,0,1.270133336385091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,float16,0,0.7531999746958414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,4,64,0,1,fp8,fp8,0,1.1204640070597331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,float16,0,1.2794559796651204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,8,64,128,1,float16,fp8,0,0.7642827033996582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,8,64,128,1,fp8,fp8,0,0.7019893328348795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,float16,0,0.4392586549123128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,8,64,0,1,float16,fp8,0,1.2871413230895996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,float16,0,0.7194666862487793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,8,64,0,1,fp8,fp8,0,1.134383996327718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,40,64,128,1,float16,fp8,0,0.45020798842112225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,40,64,128,1,fp8,fp8,0,0.4240320126215617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,float16,0,0.38604267438252765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,40,64,0,1,fp8,fp8,0,0.6535573403040568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,40,64,0,1,float16,fp8,0,0.7298933664957682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,float16,0,0.6530239979426066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,2,64,128,1,float16,fp8,0,0.39060266812642414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,2,64,128,1,fp8,fp8,0,0.3621866703033447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,2,64,0,1,float16,fp8,0,0.656607985496521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,float16,0,0.3900800148646037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,2,64,0,1,fp8,fp8,0,0.5832213163375854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,float16,0,0.6542133490244547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,4,64,128,1,float16,fp8,0,0.39555732409159344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,4,64,128,1,fp8,fp8,0,0.3649173180262248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,float16,0,0.3960426648457845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,4,64,0,1,float16,fp8,0,0.6612053314844767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,4,64,0,1,fp8,fp8,0,0.5868533452351888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,float16,0,0.6624159812927246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,8,64,128,1,float16,fp8,0,0.40113600095113117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,8,64,128,1,fp8,fp8,0,0.3723413149515788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,8,64,0,1,float16,fp8,0,0.6674346923828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,float16,0,0.23831466833750406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,8,64,0,1,fp8,fp8,0,0.5936053196589152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,float16,0,0.386677344640096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,40,64,128,1,float16,fp8,0,0.2425546646118164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,40,64,128,1,fp8,fp8,0,0.2323573430379232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,40,64,0,1,float16,fp8,0,0.39246400197347003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,float16,0,0.20777599016825357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,40,64,0,1,fp8,fp8,0,0.35336001714070636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,float16,0,0.3473866780598958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,2,64,128,1,fp8,fp8,0,0.19985065857569376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,2,64,128,1,float16,fp8,0,0.21011734008789062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,2,64,0,1,float16,fp8,0,0.3492639859517415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,2,64,0,1,fp8,fp8,0,0.31601067384084064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,float16,0,0.20999467372894287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,float16,0,0.34929601351420086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,4,64,128,1,fp8,fp8,0,0.20165866613388062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,4,64,128,1,float16,fp8,0,0.21248000860214233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,4,64,0,1,float16,fp8,0,0.35254931449890137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,4,64,0,1,fp8,fp8,0,0.3182400067647298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,float16,0,0.21454399824142456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,float16,0,0.3551093339920044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,8,64,128,1,float16,fp8,0,0.21791466077168783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,8,64,128,1,fp8,fp8,0,0.2058080037434896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,8,64,0,1,float16,fp8,0,0.3593706687291463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,float16,0,0.13818666338920593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,8,64,0,1,fp8,fp8,0,0.32306132713953656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,40,64,128,1,float16,fp8,0,0.1426346699396769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,float16,0,0.21997332572937012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,40,64,128,1,fp8,fp8,0,0.1365493337313334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,40,64,0,1,float16,fp8,0,0.22258667151133218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,40,64,0,1,fp8,fp8,0,0.2017973264058431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,float16,0,0.12202133735020955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,2,64,128,1,float16,fp8,0,0.12156800429026286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,float16,0,0.19749865929285684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,2,64,128,1,fp8,fp8,0,0.1143839955329895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,2,64,0,1,float16,fp8,0,0.19764800866444907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,2,64,0,1,fp8,fp8,0,0.17869865894317627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,float16,0,0.12210667133331299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,4,64,128,1,float16,fp8,0,0.12205333511034648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,float16,0,0.19729065895080566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,4,64,128,1,fp8,fp8,0,0.11659199992815654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,4,64,0,1,float16,fp8,0,0.19952533642450967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,4,64,0,1,fp8,fp8,0,0.1791306734085083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,float16,0,0.12230400244394939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,8,64,128,1,float16,fp8,0,0.12379733721415202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,float16,0,0.2004693349202474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,8,64,128,1,fp8,fp8,0,0.1186240017414093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,8,64,0,1,float16,fp8,0,0.20195732514063516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,8,64,0,1,fp8,fp8,0,0.18370133638381958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,float16,0,0.10169066985448201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,float16,0,0.1463093360265096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,40,64,128,1,float16,fp8,0,0.10122666756312053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,40,64,128,1,fp8,fp8,0,0.09780800342559814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,40,64,0,1,float16,fp8,0,0.14461333552996317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,40,64,0,1,fp8,fp8,0,0.13385599851608276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,float16,0,0.09960533181826274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,2,64,128,1,float16,fp8,0,0.09939733147621155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,float16,0,0.14422399799029031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,2,64,128,1,fp8,fp8,0,0.09558932979901631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,2,64,0,1,fp8,fp8,0,0.13186132907867432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,2,64,0,1,float16,fp8,0,0.14433599511782327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,float16,0,0.09964266419410706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,4,64,128,1,float16,fp8,0,0.09934932986895244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,float16,0,0.14429866274197897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,4,64,128,1,fp8,fp8,0,0.09537600477536519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,4,64,0,1,fp8,fp8,0,0.1320480008920034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,4,64,0,1,float16,fp8,0,0.14443199833234152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,float16,0,0.1002506713072459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,8,64,128,1,float16,fp8,0,0.10103467106819153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,float16,0,0.14443733294804892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,8,64,128,1,fp8,fp8,0,0.09499733646710713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,8,64,0,1,float16,fp8,0,0.14565333724021912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,8,64,0,1,fp8,fp8,0,0.13250133395195007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,float16,0,1.9437120755513508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,2,64,128,1,float16,fp8,0,1.9540425936381023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,float16,0,2.731034596761068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,2,64,128,1,fp8,fp8,0,1.77237335840861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,2,64,0,1,fp8,fp8,0,2.4200746218363443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,2,64,0,1,float16,fp8,0,2.748384157816569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,float16,0,1.9772586822509766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,float16,0,2.7671947479248047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,4,64,128,1,fp8,fp8,0,1.7943040529886882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,4,64,128,1,float16,fp8,0,1.977765401204427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,4,64,0,1,float16,fp8,0,2.7698132197062173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,float16,0,1.9949280420939128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,4,64,0,1,fp8,fp8,0,2.442410628000895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,float16,0,2.799738566080729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,8,64,128,1,float16,fp8,0,2.0064427057902017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,8,64,128,1,fp8,fp8,0,1.8358133633931477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,float16,0,1.1044853528340657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,8,64,0,1,float16,fp8,0,2.8108107248942056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,8,64,0,1,fp8,fp8,0,2.4885120391845703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,40,64,128,1,float16,fp8,0,1.1229920387268066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,float16,0,1.5371200243632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,40,64,128,1,fp8,fp8,0,1.0566240151723225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,float16,0,0.9650719960530599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,40,64,0,1,fp8,fp8,0,1.4070933659871419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,40,64,0,1,float16,fp8,0,1.557205359141032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,2,64,128,1,float16,fp8,0,0.9751839637756348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,float16,0,1.365898609161377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,2,64,128,1,fp8,fp8,0,0.8892800013224283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,2,64,0,1,float16,fp8,0,1.3758880297342937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,float16,0,0.9744319915771484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,2,64,0,1,fp8,fp8,0,1.216373364130656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,float16,0,1.3733065923055012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,4,64,128,1,float16,fp8,0,0.985146681467692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,4,64,128,1,fp8,fp8,0,0.8996053536732992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,4,64,0,1,float16,fp8,0,1.3852640787760417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,4,64,0,1,fp8,fp8,0,1.2258453369140625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,float16,0,0.9922239780426025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,float16,0,1.396458625793457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,8,64,128,1,float16,fp8,0,1.0013279914855957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,8,64,128,1,fp8,fp8,0,0.9200106461842855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,float16,0,0.564682682355245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,8,64,0,1,float16,fp8,0,1.4064745903015137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,float16,0,0.7879306475321451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,8,64,0,1,fp8,fp8,0,1.250501314798991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,40,64,128,1,float16,fp8,0,0.577461322148641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,40,64,128,1,fp8,fp8,0,0.5436426798502604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,40,64,0,1,float16,fp8,0,0.799295981725057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,40,64,0,1,fp8,fp8,0,0.7220853169759115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,float16,0,0.4959520101547241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,float16,0,0.7017652988433838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,2,64,128,1,float16,fp8,0,0.5017760197321574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,2,64,128,1,fp8,fp8,0,0.4622346560160319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,2,64,0,1,float16,fp8,0,0.703978697458903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,float16,0,0.501360019048055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,2,64,0,1,fp8,fp8,0,0.6280320088068644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,float16,0,0.7068320115407308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,4,64,128,1,float16,fp8,0,0.5065066814422607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,4,64,128,1,fp8,fp8,0,0.4660693407058716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,4,64,0,1,float16,fp8,0,0.7114986578623453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,float16,0,0.5107466777165731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,4,64,0,1,fp8,fp8,0,0.6324853499730428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,8,64,128,1,fp8,fp8,0,0.4742293357849121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,8,64,128,1,float16,fp8,0,0.5165866613388062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,float16,0,0.7158880233764648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,float16,0,0.2978559931119283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,8,64,0,1,fp8,fp8,0,0.6445866823196411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,8,64,0,1,float16,fp8,0,0.7211999893188477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,float16,0,0.4130133390426636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,40,64,128,1,float16,fp8,0,0.3044373393058777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,40,64,128,1,fp8,fp8,0,0.28957333167394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,40,64,0,1,float16,fp8,0,0.4210826555887858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,float16,0,0.2597600022951762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,40,64,0,1,fp8,fp8,0,0.38095466295878094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,float16,0,0.3662453492482503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,2,64,128,1,float16,fp8,0,0.2625173330307007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,2,64,128,1,fp8,fp8,0,0.24700266122817993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,2,64,0,1,float16,fp8,0,0.36848533153533936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,2,64,0,1,fp8,fp8,0,0.3330133358637492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,float16,0,0.2643839915593465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,4,64,128,1,fp8,fp8,0,0.25010132789611816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,4,64,128,1,float16,fp8,0,0.2669173280398051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,float16,0,0.3699093262354533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,4,64,0,1,fp8,fp8,0,0.33876800537109375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,float16,0,0.2691520055135091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,4,64,0,1,float16,fp8,0,0.37251734733581543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,8,64,128,1,fp8,fp8,0,0.25411200523376465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,8,64,128,1,float16,fp8,0,0.2716853419939677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,float16,0,0.37748265266418457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,float16,0,0.16456533471743265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,8,64,0,1,fp8,fp8,0,0.3428373336791992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,8,64,0,1,float16,fp8,0,0.379311998685201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,40,64,128,1,float16,fp8,0,0.16879467169443765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,float16,0,0.2286400000254313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,40,64,128,1,fp8,fp8,0,0.16057067116101584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,40,64,0,1,float16,fp8,0,0.2305013338724772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,40,64,0,1,fp8,fp8,0,0.21174399058024088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,float16,0,0.1402666668097178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,float16,0,0.19619733095169067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,2,64,128,1,float16,fp8,0,0.14220800002415976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,2,64,128,1,fp8,fp8,0,0.13492266337076822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,float16,0,0.14265066385269165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,2,64,0,1,fp8,fp8,0,0.1811573306719462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,2,64,0,1,float16,fp8,0,0.19826666514078775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,float16,0,0.19944000244140625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,4,64,128,1,float16,fp8,0,0.14432533582051596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,4,64,128,1,fp8,fp8,0,0.13825066884358725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,4,64,0,1,float16,fp8,0,0.1998400092124939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,4,64,0,1,fp8,fp8,0,0.18602667252222696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,float16,0,0.14437333742777506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,float16,0,0.20191999276479086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,8,64,128,1,float16,fp8,0,0.14789332946141562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,8,64,128,1,fp8,fp8,0,0.14245866735776266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,8,64,0,1,float16,fp8,0,0.20382400353749594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,8,64,0,1,fp8,fp8,0,0.18981866041819254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,float16,0,0.09501866499582927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,float16,0,0.13011200229326883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,40,64,128,1,float16,fp8,0,0.09918399651845296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,40,64,128,1,fp8,fp8,0,0.0981280008951823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,40,64,0,1,float16,fp8,0,0.13243200381596884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,40,64,0,1,fp8,fp8,0,0.12576533357302347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,float16,0,0.08686400453249614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,float16,0,0.11999467015266418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,2,64,128,1,float16,fp8,0,0.08710400263468425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,2,64,128,1,fp8,fp8,0,0.08237333099047343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,2,64,0,1,float16,fp8,0,0.11990933616956075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,2,64,0,1,fp8,fp8,0,0.10733333230018616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,float16,0,0.08729066451390584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,4,64,128,1,float16,fp8,0,0.08873066306114197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,float16,0,0.11956800023714702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,4,64,128,1,fp8,fp8,0,0.08085866769154866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,float16,0,0.08889599641164143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,4,64,0,1,fp8,fp8,0,0.10745599865913391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,4,64,0,1,float16,fp8,0,0.12011200189590454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,float16,0,0.11966400345166524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,8,64,128,1,float16,fp8,0,0.0885706643263499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,8,64,128,1,fp8,fp8,0,0.08269866804281871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,8,64,0,1,float16,fp8,0,0.12054933110872905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,8,64,0,1,fp8,fp8,0,0.1092800001303355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,float16,0,0.07246933380762736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,float16,0,0.09107200304667155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,40,64,128,1,fp8,fp8,0,0.06825600067774455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,40,64,128,1,float16,fp8,0,0.07251200079917908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,40,64,0,1,float16,fp8,0,0.09304533402125041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,40,64,0,1,fp8,fp8,0,0.08481599887212117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,float16,0,0.07247466842333476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,float16,0,0.09245333075523376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,2,64,128,1,float16,fp8,0,0.0727893312772115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,2,64,128,1,fp8,fp8,0,0.06949866811434428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,2,64,0,1,float16,fp8,0,0.09103999535242717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,2,64,0,1,fp8,fp8,0,0.08532266815503438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,float16,0,0.07222400108973186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,float16,0,0.09121066331863403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,4,64,128,1,float16,fp8,0,0.07254933317502339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,4,64,128,1,fp8,fp8,0,0.06909866631031036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,4,64,0,1,float16,fp8,0,0.09124267101287842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,4,64,0,1,fp8,fp8,0,0.08478933572769165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,float16,0,0.07285333176453908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,float16,0,0.09116266171137492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,8,64,128,1,float16,fp8,0,0.07278400162855785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,8,64,128,1,fp8,fp8,0,0.06863466898600261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,8,64,0,1,float16,fp8,0,0.09116266171137492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,8,64,0,1,fp8,fp8,0,0.08522666494051616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,float16,0,1.4451360702514648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,2,64,128,1,float16,fp8,0,1.4525334040323894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,float16,0,1.8300426801045735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,2,64,128,1,fp8,fp8,0,1.3231946627298992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,2,64,0,1,float16,fp8,0,1.8388800621032715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,2,64,0,1,fp8,fp8,0,1.6349120140075684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,float16,0,1.4616533915201824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,4,64,128,1,float16,fp8,0,1.470234711964925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,float16,0,1.846501350402832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,4,64,128,1,fp8,fp8,0,1.3390080134073894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,4,64,0,1,fp8,fp8,0,1.6524532636006672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,float16,0,1.4789387385050456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,4,64,0,1,float16,fp8,0,1.8574825922648113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,float16,0,1.874106725056966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,8,64,128,1,float16,fp8,0,1.4924853642781575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,8,64,128,1,fp8,fp8,0,1.3622825940450032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,8,64,0,1,float16,fp8,0,1.882047971089681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,float16,0,0.8319520155588785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,float16,0,1.045423984527588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,8,64,0,1,fp8,fp8,0,1.6774293581644695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,40,64,128,1,float16,fp8,0,0.8468906879425049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,40,64,128,1,fp8,fp8,0,0.7963519891103109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,40,64,0,1,fp8,fp8,0,0.9679146607716879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,40,64,0,1,float16,fp8,0,1.0648319721221924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,float16,0,0.7278772989908854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,float16,0,0.9258933067321777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,2,64,128,1,fp8,fp8,0,0.6706079641977946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,2,64,128,1,float16,fp8,0,0.7354773680369059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,2,64,0,1,float16,fp8,0,0.9291573365529379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,2,64,0,1,fp8,fp8,0,0.8286773363749186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,float16,0,0.7341600259145101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,float16,0,0.9316159884134928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,4,64,128,1,fp8,fp8,0,0.678277333577474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,4,64,128,1,float16,fp8,0,0.7421440283457438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,4,64,0,1,float16,fp8,0,0.936784029006958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,4,64,0,1,fp8,fp8,0,0.837615966796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,float16,0,0.744645357131958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,float16,0,0.943290630976359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,8,64,128,1,float16,fp8,0,0.7529280185699463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,8,64,128,1,fp8,fp8,0,0.6919466654459635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,float16,0,0.42875198523203534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,float16,0,0.5396480162938436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,8,64,0,1,float16,fp8,0,0.953925371170044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,8,64,0,1,fp8,fp8,0,0.8521386782328287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,40,64,128,1,float16,fp8,0,0.4376480182011922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,40,64,128,1,fp8,fp8,0,0.4105600118637085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,40,64,0,1,float16,fp8,0,0.550597349802653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,float16,0,0.3744373321533203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,40,64,0,1,fp8,fp8,0,0.5011839866638184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,float16,0,0.4734933376312256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,2,64,128,1,float16,fp8,0,0.37960533301035565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,2,64,128,1,fp8,fp8,0,0.3508373498916626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,2,64,0,1,float16,fp8,0,0.47786664962768555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,2,64,0,1,fp8,fp8,0,0.43104533354441327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,float16,0,0.38016533851623535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,float16,0,0.47988800207773846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,4,64,128,1,float16,fp8,0,0.38386134306589764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,4,64,128,1,fp8,fp8,0,0.3553280035654704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,4,64,0,1,float16,fp8,0,0.4853866497675578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,4,64,0,1,fp8,fp8,0,0.4359360138575236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,float16,0,0.3853280146916707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,float16,0,0.4869973262151082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,8,64,128,1,float16,fp8,0,0.39046398798624676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,8,64,128,1,fp8,fp8,0,0.3604960044225057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,float16,0,0.22769065697987875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,8,64,0,1,float16,fp8,0,0.4928319851557414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,8,64,0,1,fp8,fp8,0,0.44152534008026123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,float16,0,0.2876799901326497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,40,64,128,1,float16,fp8,0,0.23427200317382812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,40,64,128,1,fp8,fp8,0,0.22169599930445352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,40,64,0,1,float16,fp8,0,0.29208532969156903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,40,64,0,1,fp8,fp8,0,0.26828267176946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,float16,0,0.1961066722869873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,2,64,128,1,float16,fp8,0,0.19828800360361734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,float16,0,0.2477333347002665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,2,64,128,1,fp8,fp8,0,0.18759999672571817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,2,64,0,1,float16,fp8,0,0.2504799962043762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,2,64,0,1,fp8,fp8,0,0.2309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,float16,0,0.1978773276011149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,float16,0,0.2508053382237752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,4,64,128,1,float16,fp8,0,0.20096000035603842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,4,64,128,1,fp8,fp8,0,0.19139732917149863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,4,64,0,1,float16,fp8,0,0.25542400280634564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,4,64,0,1,fp8,fp8,0,0.233514666557312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,float16,0,0.20333333810170492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,float16,0,0.25727466742197674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,8,64,128,1,fp8,fp8,0,0.1954773267110189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,8,64,128,1,float16,fp8,0,0.2067413330078125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,8,64,0,1,float16,fp8,0,0.2598186731338501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,float16,0,0.12804266810417175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,8,64,0,1,fp8,fp8,0,0.23783467213312784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,float16,0,0.15982932845751444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,40,64,128,1,float16,fp8,0,0.12993066509564719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,40,64,128,1,fp8,fp8,0,0.1251626710096995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,40,64,0,1,float16,fp8,0,0.16242133577664694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,float16,0,0.10825066765149434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,40,64,0,1,fp8,fp8,0,0.1500746707121531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,float16,0,0.13818132877349854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,2,64,128,1,float16,fp8,0,0.10944533348083496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,2,64,128,1,fp8,fp8,0,0.1011893351872762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,2,64,0,1,float16,fp8,0,0.1393013298511505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,float16,0,0.10958400368690491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,2,64,0,1,fp8,fp8,0,0.12595733006795248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,float16,0,0.1381439963976542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,4,64,128,1,float16,fp8,0,0.11162133018175761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,4,64,128,1,fp8,fp8,0,0.1033066709836324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,4,64,0,1,float16,fp8,0,0.13926933209101358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,float16,0,0.1113759974638621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,4,64,0,1,fp8,fp8,0,0.12661866346995035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,float16,0,0.13912000258763632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,8,64,128,1,float16,fp8,0,0.11341333389282227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,8,64,128,1,fp8,fp8,0,0.10785599549611409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,8,64,0,1,float16,fp8,0,0.14146666725476584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,float16,0,0.073594664533933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,8,64,0,1,fp8,fp8,0,0.13133866588274637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,float16,0,0.0925333301226298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,40,64,128,1,float16,fp8,0,0.07634133100509644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,40,64,128,1,fp8,fp8,0,0.07458666463692983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,40,64,0,1,float16,fp8,0,0.09427733222643535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,40,64,0,1,fp8,fp8,0,0.08929066856702168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,float16,0,0.06854933500289917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,2,64,128,1,float16,fp8,0,0.0705973356962204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,float16,0,0.08619200189908345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,2,64,128,1,fp8,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,2,64,0,1,float16,fp8,0,0.0869760016600291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,2,64,0,1,fp8,fp8,0,0.07889066636562347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,float16,0,0.06861866513888042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,4,64,128,1,float16,fp8,0,0.07071466743946075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,float16,0,0.08700799942016602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,4,64,128,1,fp8,fp8,0,0.06643199920654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,4,64,0,1,float16,fp8,0,0.0869706670443217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,float16,0,0.07069333394368489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,4,64,0,1,fp8,fp8,0,0.08060266574223836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,float16,0,0.08700799942016602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,8,64,128,1,fp8,fp8,0,0.06669866542021434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,8,64,128,1,float16,fp8,0,0.07047466437021892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,float16,0,0.056202664971351624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,8,64,0,1,float16,fp8,0,0.08896000186602275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,8,64,0,1,fp8,fp8,0,0.08081600069999695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,40,64,128,1,float16,fp8,0,0.05797333518664042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,float16,0,0.07036800185839336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,40,64,128,1,fp8,fp8,0,0.05431999762852987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,40,64,0,1,float16,fp8,0,0.07060799996058147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,40,64,0,1,fp8,fp8,0,0.06651199857393901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,float16,0,0.0562720000743866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,2,64,128,1,float16,fp8,0,0.0581279993057251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,2,64,128,1,fp8,fp8,0,0.05421333511670431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,float16,0,0.07048533360163371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,2,64,0,1,float16,fp8,0,0.070783997575442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,float16,0,0.05609600245952606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,2,64,0,1,fp8,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,4,64,128,1,float16,fp8,0,0.05817066629727682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,float16,0,0.07046400010585785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,4,64,128,1,fp8,fp8,0,0.05418666700522105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,4,64,0,1,float16,fp8,0,0.07047999898592631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,float16,0,0.05645333230495453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,4,64,0,1,fp8,fp8,0,0.06599999964237213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,float16,0,0.07025066514809926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,8,64,128,1,float16,fp8,0,0.056890666484832764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,8,64,128,1,fp8,fp8,0,0.0553653339544932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,8,64,0,1,float16,fp8,0,0.07044800122578938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,8,64,0,1,fp8,fp8,0,0.06458133459091187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,float16,0,1.6912693977355957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,2,64,128,1,float16,fp8,0,1.6858399709065754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,float16,0,1.9341600735982258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,2,64,128,1,fp8,fp8,0,1.6148160298665364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,2,64,0,1,float16,fp8,0,1.9310399691263835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,2,64,0,1,fp8,fp8,0,1.7964000701904297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,float16,0,1.7000373204549153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,float16,0,1.9410187403361003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,4,64,128,1,float16,fp8,0,1.6949599583943684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,4,64,128,1,fp8,fp8,0,1.633344014485677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,4,64,0,1,float16,fp8,0,1.9340960184733074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,4,64,0,1,fp8,fp8,0,1.8205973307291667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,float16,0,1.7681600252787273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,float16,0,2.0009652773539224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,8,64,128,1,float16,fp8,0,1.7417653401692708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,8,64,128,1,fp8,fp8,0,1.7113919258117676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,float16,0,0.9334987004597982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,float16,0,1.0748693148295085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,8,64,0,1,float16,fp8,0,1.9631147384643555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,8,64,0,1,fp8,fp8,0,1.8979466756184895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,40,64,128,1,float16,fp8,0,0.912997325261434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,40,64,128,1,fp8,fp8,0,0.9195786317189535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,40,64,0,1,float16,fp8,0,1.0570186773935955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,40,64,0,1,fp8,fp8,0,1.0297760168711345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,float16,0,0.8538880348205566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,float16,0,0.977135976155599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,2,64,128,1,float16,fp8,0,0.8529173533121744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,2,64,128,1,fp8,fp8,0,0.8118133544921875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,2,64,0,1,float16,fp8,0,0.9775146643320719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,2,64,0,1,fp8,fp8,0,0.9054400126139323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,float16,0,0.8592267036437988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,float16,0,0.9811840057373047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,4,64,128,1,float16,fp8,0,0.8575519720713297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,4,64,128,1,fp8,fp8,0,0.8180267016092936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,4,64,0,1,float16,fp8,0,0.9819573561350504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,float16,0,0.8642293612162272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,4,64,0,1,fp8,fp8,0,0.9138879776000977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,float16,0,0.9937493006388346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,8,64,128,1,fp8,fp8,0,0.8412533601125082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,8,64,128,1,float16,fp8,0,0.8611520131429037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,float16,0,0.4774826765060425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,8,64,0,1,float16,fp8,0,0.9892799854278564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,8,64,0,1,fp8,fp8,0,0.9440053304036459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,40,64,128,1,float16,fp8,0,0.46898667017618817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,float16,0,0.553050676981608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,40,64,128,1,fp8,fp8,0,0.46911998589833576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,40,64,0,1,float16,fp8,0,0.5421173175175985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,40,64,0,1,fp8,fp8,0,0.5232213338216146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,float16,0,0.43617598215738934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,2,64,128,1,float16,fp8,0,0.4366399844487508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,float16,0,0.49757333596547443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,2,64,128,1,fp8,fp8,0,0.41767998536427814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,2,64,0,1,float16,fp8,0,0.4991146723429362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,float16,0,0.4400746822357178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,2,64,0,1,fp8,fp8,0,0.4633493423461914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,float16,0,0.5038133462270101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,4,64,128,1,float16,fp8,0,0.43963201840718585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,4,64,128,1,fp8,fp8,0,0.42028268178304035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,4,64,0,1,float16,fp8,0,0.501141349474589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,4,64,0,1,fp8,fp8,0,0.4686719973882039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,float16,0,0.44413332144419354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,float16,0,0.5068480173746744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,8,64,128,1,float16,fp8,0,0.44071467717488605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,8,64,128,1,fp8,fp8,0,0.4249546527862549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,float16,0,0.24794665972391763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,8,64,0,1,float16,fp8,0,0.5052693287531534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,8,64,0,1,fp8,fp8,0,0.4718240102132161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,float16,0,0.2874133388201396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,40,64,128,1,float16,fp8,0,0.24314665794372559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,40,64,128,1,fp8,fp8,0,0.24454933404922485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,40,64,0,1,float16,fp8,0,0.28406399488449097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,40,64,0,1,fp8,fp8,0,0.27369600534439087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,float16,0,0.22791999578475952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,2,64,128,1,float16,fp8,0,0.22800532976786295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,float16,0,0.2606773376464844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,2,64,128,1,fp8,fp8,0,0.2177706758181254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,2,64,0,1,float16,fp8,0,0.2593653400739034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,2,64,0,1,fp8,fp8,0,0.24355733394622803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,float16,0,0.23060266176859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,float16,0,0.26258667310078937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,4,64,128,1,float16,fp8,0,0.22842667500178018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,4,64,128,1,fp8,fp8,0,0.22224533557891846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,4,64,0,1,fp8,fp8,0,0.2493120034535726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,float16,0,0.23072532812754312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,4,64,0,1,float16,fp8,0,0.26216532786687213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,float16,0,0.2640480001767476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,8,64,128,1,fp8,fp8,0,0.22323733568191528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,8,64,128,1,float16,fp8,0,0.23240532477696738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,8,64,0,1,float16,fp8,0,0.265173335870107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,8,64,0,1,fp8,fp8,0,0.2488373319307963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,float16,0,0.13620799779891968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,float16,0,0.1569813291231791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,40,64,128,1,float16,fp8,0,0.13471466302871704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,40,64,128,1,fp8,fp8,0,0.13532267014185587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,40,64,0,1,float16,fp8,0,0.15495466192563376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,40,64,0,1,fp8,fp8,0,0.15053866306940714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,float16,0,0.1379200021425883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,float16,0,0.12104533116022746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,2,64,128,1,float16,fp8,0,0.12256532907485962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,2,64,128,1,fp8,fp8,0,0.11767466862996419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,2,64,0,1,float16,fp8,0,0.1389173368612925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,2,64,0,1,fp8,fp8,0,0.13054399689038595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,float16,0,0.12239467104276021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,float16,0,0.13945600390434265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,4,64,128,1,float16,fp8,0,0.12229333321253459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,4,64,0,1,float16,fp8,0,0.14013866583506265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,4,64,0,1,fp8,fp8,0,0.1325546701749166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,4,64,128,1,fp8,fp8,0,0.12008532881736755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,float16,0,0.1241386632124583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,float16,0,0.14111999670664468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,8,64,128,1,float16,fp8,0,0.12458667159080505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,8,64,128,1,fp8,fp8,0,0.12307199835777283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,8,64,0,1,float16,fp8,0,0.1422826647758484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,float16,0,0.07523199915885925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,8,64,0,1,fp8,fp8,0,0.13496533036231995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,40,64,128,1,float16,fp8,0,0.073253333568573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,40,64,128,1,fp8,fp8,0,0.07823466757933299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,fp8,0,0.08582933743794759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,40,64,0,1,float16,float16,0,0.08633599678675334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,40,64,0,1,fp8,fp8,0,0.08478933572769165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,float16,0,0.07006933291753133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,float16,0,0.07992533346017201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,2,64,128,1,float16,fp8,0,0.06950399776299794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,2,64,128,1,fp8,fp8,0,0.06607466439406078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,2,64,0,1,float16,fp8,0,0.07880533238252004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,2,64,0,1,fp8,fp8,0,0.07414400080839793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,float16,0,0.07892266909281413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,float16,0,0.06946133573849995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,4,64,128,1,float16,fp8,0,0.06960533559322357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,4,64,128,1,fp8,fp8,0,0.06446933249632518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,4,64,0,1,fp8,fp8,0,0.0739519993464152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,4,64,0,1,float16,fp8,0,0.08005866905053456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,float16,0,0.06900266806284587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,float16,0,0.0807360013326009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,8,64,128,1,fp8,fp8,0,0.0647680014371872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,8,64,128,1,float16,fp8,0,0.0698880006869634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,8,64,0,1,float16,fp8,0,0.07930666704972585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,8,64,0,1,fp8,fp8,0,0.07357866565386455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,float16,0,0.04649066428343455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,float16,0,0.04996266464392344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,40,64,128,1,float16,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,40,64,0,1,float16,fp8,0,0.049600000182787575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,40,64,128,1,fp8,fp8,0,0.04446933170159658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,40,64,0,1,fp8,fp8,0,0.04782933493455251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,float16,0,0.044138665000597634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,2,64,128,1,float16,fp8,0,0.04394666850566864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,float16,0,0.04823466638724009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,2,64,128,1,fp8,fp8,0,0.042133331298828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,2,64,0,1,float16,fp8,0,0.04905066887537638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,2,64,0,1,fp8,fp8,0,0.04568533102671305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,float16,0,0.04554133117198944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,float16,0,0.04764799773693085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,4,64,128,1,fp8,fp8,0,0.042277331153551735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,4,64,128,1,float16,fp8,0,0.044682666659355164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,4,64,0,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,4,64,0,1,fp8,fp8,0,0.04600533346335093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,float16,0,0.04458666841189066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,float16,0,0.04804799954096476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,8,64,128,1,float16,fp8,0,0.04419200122356415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,8,64,0,1,float16,fp8,0,0.04807466765244802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,8,64,128,1,fp8,fp8,0,0.04218133290608724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,8,64,0,1,fp8,fp8,0,0.04560533165931702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,float16,0,0.03366400053103765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,float16,0,0.0373333344856898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,40,64,128,1,float16,fp8,0,0.03297599901755651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,40,64,128,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,40,64,0,1,float16,fp8,0,0.03639466563860575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,40,64,0,1,fp8,fp8,0,0.03391999999682108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,2,64,128,1,float16,fp8,0,0.032586666444937386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,float16,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,2,64,128,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,2,64,0,1,float16,fp8,0,0.035749333600203194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,float16,0,0.03176533430814743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,2,64,0,1,fp8,fp8,0,0.03402666747570038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,float16,0,0.03537066777547201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,4,64,128,1,float16,fp8,0,0.03196266790231069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,4,64,128,1,fp8,fp8,0,0.03028800090154012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,4,64,0,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,4,64,0,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,float16,0,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,8,64,128,1,float16,fp8,0,0.03299200038115183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,float16,0,0.035743998984495796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,8,64,128,1,fp8,fp8,0,0.030095999439557392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,8,64,0,1,float16,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,8,64,0,1,fp8,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,2,64,128,1,float16,float16,0,1.643290678660075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,2,64,0,1,float16,float16,0,1.648538589477539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,2,64,128,1,float16,fp8,0,1.639861265818278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,2,64,0,1,fp8,fp8,0,1.5754772822062175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,2,64,128,1,fp8,fp8,0,1.5908106168111165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,2,64,0,1,float16,fp8,0,1.6449119249979656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,4,64,128,1,float16,float16,0,1.6470293998718262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,4,64,0,1,float16,float16,0,1.6570666631062825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,4,64,128,1,fp8,fp8,0,1.586751937866211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,4,64,128,1,float16,fp8,0,1.6465546290079753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,4,64,0,1,float16,fp8,0,1.6486825942993164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,4,64,0,1,fp8,fp8,0,1.5790613492329915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,8,64,128,1,float16,float16,0,1.7300426165262859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,8,64,0,1,float16,float16,0,1.7308212916056316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,8,64,128,1,float16,fp8,0,1.6930826505025227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,40,64,128,1,float16,float16,0,0.9071786403656006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,8,64,128,1,fp8,fp8,0,1.6703519821166992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,8,64,0,1,float16,fp8,0,1.7332746187845867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,8,64,0,1,fp8,fp8,0,1.6539039611816406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,40,64,128,1,float16,fp8,0,0.8900960286458334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,40,64,0,1,float16,float16,0,0.9222666422526041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,40,64,128,1,fp8,fp8,0,0.9021120071411133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,40,64,0,1,float16,fp8,0,0.9028800328572592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,2,64,128,1,float16,float16,0,0.8270506858825684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,40,64,0,1,fp8,fp8,0,0.9075946807861328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,2,64,0,1,float16,float16,0,0.8332853317260742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,2,64,128,1,float16,fp8,0,0.8277280330657959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,2,64,128,1,fp8,fp8,0,0.7877333164215088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,2,64,0,1,float16,fp8,0,0.832090695699056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,2,64,0,1,fp8,fp8,0,0.7822453180948893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,4,64,128,1,float16,float16,0,0.834928035736084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,4,64,0,1,float16,float16,0,0.8355627059936523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,4,64,128,1,fp8,fp8,0,0.799845298131307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,4,64,128,1,float16,fp8,0,0.8326186339060465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,4,64,0,1,float16,fp8,0,0.8372320334116617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,4,64,0,1,fp8,fp8,0,0.7892906665802002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,8,64,128,1,float16,float16,0,0.8444746335347494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,8,64,0,1,float16,float16,0,0.8458826541900635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,8,64,128,1,float16,fp8,0,0.8393386999766032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,8,64,128,1,fp8,fp8,0,0.8171199957529703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,40,64,128,1,float16,float16,0,0.46752532323201496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,8,64,0,1,float16,fp8,0,0.8443733056386312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,8,64,0,1,fp8,fp8,0,0.8092586994171143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,40,64,128,1,float16,fp8,0,0.4575146834055583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,40,64,128,1,fp8,fp8,0,0.45715733369191486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,40,64,0,1,float16,float16,0,0.473632017771403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,40,64,0,1,float16,fp8,0,0.4639413356781006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,2,64,128,1,float16,float16,0,0.4238933324813843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,40,64,0,1,fp8,fp8,0,0.45746131738026935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,2,64,128,1,fp8,fp8,0,0.406490683555603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,2,64,0,1,float16,float16,0,0.42441598574320477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,2,64,128,1,float16,fp8,0,0.4235466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,2,64,0,1,fp8,fp8,0,0.4015573263168335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,2,64,0,1,float16,fp8,0,0.4251893361409505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,4,64,128,1,float16,float16,0,0.4288426637649536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,4,64,128,1,float16,fp8,0,0.42657601833343506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,4,64,0,1,float16,float16,0,0.4295733372370402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,4,64,128,1,fp8,fp8,0,0.4098879893620809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,4,64,0,1,fp8,fp8,0,0.40754131476084393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,4,64,0,1,float16,fp8,0,0.42884798844655353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,8,64,128,1,float16,float16,0,0.43219733238220215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,8,64,128,1,float16,fp8,0,0.42973331610361737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,8,64,0,1,float16,float16,0,0.4337386687596639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,8,64,128,1,fp8,fp8,0,0.41410664717356366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,40,64,128,1,float16,float16,0,0.24200532833735147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,8,64,0,1,fp8,fp8,0,0.4112906853357951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,8,64,0,1,float16,fp8,0,0.43275201320648193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,40,64,0,1,float16,float16,0,0.24522133668263754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,40,64,128,1,float16,fp8,0,0.23753599325815836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,40,64,128,1,fp8,fp8,0,0.24068800608317056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,40,64,0,1,float16,fp8,0,0.24243199825286865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,2,64,128,1,float16,float16,0,0.22074133157730103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,40,64,0,1,fp8,fp8,0,0.23867199818293253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,2,64,0,1,float16,float16,0,0.22229333718617758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,2,64,128,1,fp8,fp8,0,0.21155200401941934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,2,64,128,1,float16,fp8,0,0.22054932514826456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,2,64,0,1,float16,fp8,0,0.22076267004013062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,2,64,0,1,fp8,fp8,0,0.20957332849502563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,4,64,128,1,float16,float16,0,0.22386133670806885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,4,64,128,1,float16,fp8,0,0.22369599342346191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,4,64,0,1,float16,fp8,0,0.224671999613444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,4,64,128,1,fp8,fp8,0,0.21543999512990317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,4,64,0,1,float16,float16,0,0.22476800282796225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,4,64,0,1,fp8,fp8,0,0.2146986722946167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,8,64,128,1,float16,float16,0,0.2261120080947876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,8,64,0,1,float16,float16,0,0.22593067089716592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,8,64,128,1,float16,fp8,0,0.22477332750956217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,8,64,128,1,fp8,fp8,0,0.21709867318471274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,8,64,0,1,float16,fp8,0,0.22686932484308878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,8,64,0,1,fp8,fp8,0,0.2152000069618225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,40,64,128,1,float16,float16,0,0.13218667109807333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,40,64,0,1,float16,float16,0,0.13404800494511923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,40,64,128,1,float16,fp8,0,0.13012267152468363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,40,64,128,1,fp8,fp8,0,0.1325759987036387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,40,64,0,1,float16,fp8,0,0.13369599978129068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,40,64,0,1,fp8,fp8,0,0.1329813301563263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,2,64,128,1,float16,float16,0,0.1176479955514272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,2,64,0,1,float16,float16,0,0.11891200145085652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,2,64,128,1,float16,fp8,0,0.11877333124478658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,2,64,128,1,fp8,fp8,0,0.11436800161997478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,2,64,0,1,float16,fp8,0,0.11892267068227132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,2,64,0,1,fp8,fp8,0,0.11395200093587239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,4,64,0,1,float16,float16,0,0.11989333232243855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,4,64,128,1,float16,float16,0,0.11991999546686809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,4,64,128,1,float16,fp8,0,0.11967466274897258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,4,64,128,1,fp8,fp8,0,0.11665067076683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,4,64,0,1,float16,fp8,0,0.11956800023714702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,4,64,0,1,fp8,fp8,0,0.11584533254305522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,8,64,128,1,float16,float16,0,0.12011733651161194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,8,64,128,1,float16,fp8,0,0.12035733461380005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,8,64,0,1,float16,float16,0,0.12060800194740295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,8,64,128,1,fp8,fp8,0,0.11775466799736023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,8,64,0,1,float16,fp8,0,0.12045333782831828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,8,64,0,1,fp8,fp8,0,0.11708266536394756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,40,64,128,1,float16,float16,0,0.07294933497905731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,40,64,0,1,float16,float16,0,0.07255466779073079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,40,64,128,1,float16,fp8,0,0.07245866457621257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,40,64,128,1,fp8,fp8,0,0.07494399944941203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,40,64,0,1,fp8,fp8,0,0.07470400134722392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,2,64,128,1,float16,float16,0,0.06720533470312755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,40,64,0,1,float16,fp8,0,0.07259200016657512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,2,64,0,1,float16,float16,0,0.0665280024210612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,2,64,128,1,float16,fp8,0,0.06795733173688252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,2,64,128,1,fp8,fp8,0,0.06405866642793019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,2,64,0,1,float16,fp8,0,0.06668266654014587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,2,64,0,1,fp8,fp8,0,0.06312533219655354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,4,64,128,1,float16,float16,0,0.06739733119805653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,4,64,0,1,float16,float16,0,0.06865066786607106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,4,64,128,1,float16,fp8,0,0.0688213308652242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,4,64,128,1,fp8,fp8,0,0.06448000172773997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,4,64,0,1,float16,fp8,0,0.06679466863473256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,4,64,0,1,fp8,fp8,0,0.0634933312733968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,8,64,128,1,float16,float16,0,0.0687253326177597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,8,64,0,1,float16,float16,0,0.06857066849867503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,8,64,128,1,float16,fp8,0,0.06816533207893372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,8,64,128,1,fp8,fp8,0,0.06471999982992808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,8,64,0,1,fp8,fp8,0,0.06414933502674103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,8,64,0,1,float16,fp8,0,0.06696000198523204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,40,64,128,1,float16,float16,0,0.04351999859015147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,40,64,0,1,float16,float16,0,0.043578664461771645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,40,64,128,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,40,64,0,1,float16,fp8,0,0.04378133515516917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,40,64,128,1,fp8,fp8,0,0.04178666571776072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,40,64,0,1,fp8,fp8,0,0.04155733436346054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,2,64,128,1,float16,float16,0,0.0435146689414978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,2,64,0,1,float16,float16,0,0.04203199843565623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,2,64,128,1,float16,fp8,0,0.0432640016078949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,2,64,128,1,fp8,fp8,0,0.04167999823888143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,2,64,0,1,float16,fp8,0,0.0422026664018631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,2,64,0,1,fp8,fp8,0,0.03977066775163015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,4,64,128,1,float16,float16,0,0.043738668163617454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,4,64,0,1,float16,float16,0,0.042821332812309265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,4,64,128,1,float16,fp8,0,0.04196799794832865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,4,64,128,1,fp8,fp8,0,0.040762667854626976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,4,64,0,1,float16,fp8,0,0.042549331982930504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,4,64,0,1,fp8,fp8,0,0.03968533376852671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,8,64,128,1,float16,float16,0,0.04275199770927429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,8,64,128,1,float16,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,8,64,0,1,float16,float16,0,0.04180799921353658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,8,64,128,1,fp8,fp8,0,0.04062933226426443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,8,64,0,1,float16,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,40,64,128,1,float16,float16,0,0.03160533308982849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,8,64,0,1,fp8,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,40,64,0,1,float16,float16,0,0.031925333042939506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,40,64,128,1,float16,fp8,0,0.03348266581694285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,40,64,128,1,fp8,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,40,64,0,1,float16,fp8,0,0.032229334115982056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,40,64,0,1,fp8,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,2,64,128,1,float16,float16,0,0.03179199993610382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,2,64,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,2,64,128,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,2,64,0,1,float16,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,2,64,128,1,fp8,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,2,64,0,1,fp8,fp8,0,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,4,64,128,1,float16,float16,0,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,4,64,0,1,float16,float16,0,0.03201599915822347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,4,64,128,1,float16,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,4,64,128,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,4,64,0,1,fp8,fp8,0,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,4,64,0,1,float16,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,8,64,128,1,float16,float16,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,8,64,0,1,float16,float16,0,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,8,64,128,1,float16,fp8,0,0.0322826678554217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,8,64,128,1,fp8,fp8,0,0.02974933385848999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,8,64,0,1,float16,fp8,0,0.03187733391920725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,8,64,0,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,40,64,128,1,float16,float16,0,0.02364266663789749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,40,64,0,1,float16,float16,0,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,40,64,128,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,40,64,128,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,40,64,0,1,float16,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,2,64,128,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,40,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,2,64,0,1,float16,float16,0,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,2,64,128,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,2,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,2,64,128,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,2,64,0,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,4,64,128,1,float16,float16,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,4,64,0,1,float16,float16,0,0.02367466688156128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,4,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,4,64,128,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,4,64,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,4,64,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,8,64,128,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,8,64,0,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,8,64,128,1,float16,fp8,0,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,8,64,128,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,8,64,0,1,float16,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,8,64,0,1,fp8,fp8,0,0.022842665513356526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,2,64,128,1,float16,float16,0,0.7652959823608398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,2,64,0,1,float16,float16,0,0.7473119894663492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,2,64,128,1,float16,fp8,0,0.7631359895070394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,2,64,128,1,fp8,fp8,0,0.7179466883341471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,2,64,0,1,fp8,fp8,0,0.6942773660024008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,2,64,0,1,float16,fp8,0,0.7461919784545898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,4,64,128,1,float16,float16,0,0.770581324895223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,4,64,0,1,float16,float16,0,0.7530879974365234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,4,64,128,1,float16,fp8,0,0.7688906987508138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,4,64,128,1,fp8,fp8,0,0.7312053044637045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,4,64,0,1,float16,fp8,0,0.7504746913909912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,4,64,0,1,fp8,fp8,0,0.7071893215179443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,8,64,128,1,float16,float16,0,0.777786652247111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,8,64,0,1,float16,float16,0,0.7635146776835123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,8,64,128,1,float16,fp8,0,0.7724160353342692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,8,64,128,1,fp8,fp8,0,0.7711146672566732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,8,64,0,1,float16,fp8,0,0.7552479902903239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,8,64,0,1,fp8,fp8,0,0.7469387054443359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,40,64,128,1,float16,float16,0,0.43461867173512775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,40,64,0,1,float16,float16,0,0.42853331565856934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,40,64,128,1,float16,fp8,0,0.42532801628112793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,40,64,128,1,fp8,fp8,0,0.42635734875996906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,40,64,0,1,float16,fp8,0,0.4158666531244914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,40,64,0,1,fp8,fp8,0,0.4145333369572957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,2,64,128,1,float16,float16,0,0.38810133934020996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,2,64,0,1,float16,float16,0,0.3803146680196126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,2,64,128,1,float16,fp8,0,0.3877386649449666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,2,64,128,1,fp8,fp8,0,0.36934932072957355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,2,64,0,1,fp8,fp8,0,0.3585493167241414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,2,64,0,1,float16,fp8,0,0.379258672396342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,4,64,0,1,float16,float16,0,0.38261866569519043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,4,64,128,1,float16,float16,0,0.39273067315419513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,4,64,128,1,float16,fp8,0,0.39188798268636066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,4,64,128,1,fp8,fp8,0,0.37488532066345215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,4,64,0,1,float16,fp8,0,0.38313066959381104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,4,64,0,1,fp8,fp8,0,0.3662506739298503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,8,64,128,1,float16,float16,0,0.3966133197148641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,8,64,0,1,float16,float16,0,0.3878399928410848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,8,64,128,1,float16,fp8,0,0.39603734016418457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,8,64,128,1,fp8,fp8,0,0.3795093297958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,8,64,0,1,float16,fp8,0,0.38648533821105957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,8,64,0,1,fp8,fp8,0,0.3685973485310872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,40,64,128,1,float16,float16,0,0.2249173323313395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,40,64,0,1,float16,float16,0,0.22027732928593954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,40,64,128,1,float16,fp8,0,0.22194133202234903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,40,64,0,1,float16,fp8,0,0.21610132853190103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,40,64,128,1,fp8,fp8,0,0.22268799940745035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,40,64,0,1,fp8,fp8,0,0.21751999855041504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,2,64,128,1,float16,float16,0,0.2017013430595398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,2,64,0,1,float16,float16,0,0.1976213256518046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,2,64,128,1,float16,fp8,0,0.20181334018707275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,2,64,0,1,float16,fp8,0,0.19783467054367065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,2,64,128,1,fp8,fp8,0,0.19248000780741373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,2,64,0,1,fp8,fp8,0,0.18777066469192505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,4,64,0,1,float16,float16,0,0.20002132654190063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,4,64,128,1,float16,float16,0,0.2054133415222168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,4,64,128,1,float16,fp8,0,0.20463999112447104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,4,64,0,1,float16,fp8,0,0.19936533768971762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,4,64,128,1,fp8,fp8,0,0.1995519995689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,4,64,0,1,fp8,fp8,0,0.19309866428375244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,8,64,0,1,float16,float16,0,0.20193066199620566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,8,64,128,1,float16,float16,0,0.20639467239379883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,8,64,128,1,float16,fp8,0,0.20729066928227743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,8,64,128,1,fp8,fp8,0,0.19911466042200723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,8,64,0,1,float16,fp8,0,0.20173333088556925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,8,64,0,1,fp8,fp8,0,0.1946293314297994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,40,64,128,1,float16,float16,0,0.12435733278592427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,40,64,0,1,float16,float16,0,0.12180266777674358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,40,64,128,1,float16,fp8,0,0.12311466534932454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,40,64,0,1,float16,fp8,0,0.11997866630554199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,40,64,128,1,fp8,fp8,0,0.12462400396664937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,40,64,0,1,fp8,fp8,0,0.12059733271598816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,2,64,128,1,float16,float16,0,0.11077866951624553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,2,64,0,1,float16,float16,0,0.10831466317176819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,2,64,128,1,float16,fp8,0,0.10967466235160828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,2,64,128,1,fp8,fp8,0,0.1051573355992635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,2,64,0,1,float16,fp8,0,0.10789333780606587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,2,64,0,1,fp8,fp8,0,0.10175466537475586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,4,64,128,1,float16,float16,0,0.11158933242162068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,4,64,0,1,float16,float16,0,0.10794132947921753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,4,64,128,1,float16,fp8,0,0.11179199814796448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,4,64,128,1,fp8,fp8,0,0.10763733585675557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,4,64,0,1,float16,fp8,0,0.1076800028483073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,4,64,0,1,fp8,fp8,0,0.10333866874376933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,8,64,128,1,float16,float16,0,0.1132586697737376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,8,64,0,1,float16,float16,0,0.10970667004585266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,8,64,128,1,fp8,fp8,0,0.10925333698590596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,8,64,128,1,float16,fp8,0,0.11421866218249004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,8,64,0,1,float16,fp8,0,0.10946133732795715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,8,64,0,1,fp8,fp8,0,0.10576533277829488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,40,64,128,1,float16,float16,0,0.06950399776299794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,40,64,0,1,float16,float16,0,0.06814933319886525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,40,64,128,1,fp8,fp8,0,0.07309866448243459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,40,64,128,1,float16,fp8,0,0.07009066641330719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,40,64,0,1,float16,fp8,0,0.06704000135262807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,40,64,0,1,fp8,fp8,0,0.07016533116499583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,2,64,128,1,float16,float16,0,0.0646613339583079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,2,64,128,1,float16,fp8,0,0.06477333108584087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,2,64,128,1,fp8,fp8,0,0.06021333237489065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,2,64,0,1,float16,fp8,0,0.06329600016276042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,2,64,0,1,float16,float16,0,0.0632479985555013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,2,64,0,1,fp8,fp8,0,0.05900266766548157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,4,64,128,1,float16,float16,0,0.06435733536879222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,4,64,0,1,float16,float16,0,0.06393066545327504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,4,64,128,1,float16,fp8,0,0.06533333162466685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,4,64,128,1,fp8,fp8,0,0.06159999966621399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,4,64,0,1,float16,fp8,0,0.0634933312733968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,4,64,0,1,fp8,fp8,0,0.058304001887639366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,8,64,0,1,float16,float16,0,0.06382933259010315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,8,64,128,1,float16,fp8,0,0.06467199822266896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,8,64,128,1,fp8,fp8,0,0.061205332477887474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,8,64,128,1,float16,float16,0,0.06541333099206288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,8,64,0,1,float16,fp8,0,0.06211733321348826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,8,64,0,1,fp8,fp8,0,0.06006933252016703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,40,64,128,1,float16,float16,0,0.042837331692377724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,40,64,0,1,float16,float16,0,0.04295999805132548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,40,64,128,1,float16,fp8,0,0.042117332418759666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,40,64,128,1,fp8,fp8,0,0.04120533416668574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,40,64,0,1,float16,fp8,0,0.043151999513308205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,2,64,128,1,float16,float16,0,0.040976000328858696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,2,64,0,1,float16,float16,0,0.03947199881076813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,2,64,128,1,float16,fp8,0,0.040218666195869446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,40,64,0,1,fp8,fp8,0,0.040821333726247154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,2,64,0,1,float16,fp8,0,0.03956266740957896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,2,64,128,1,fp8,fp8,0,0.03882666677236557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,2,64,0,1,fp8,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,4,64,128,1,float16,float16,0,0.04278400043646494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,4,64,128,1,float16,fp8,0,0.04188266893227895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,4,64,0,1,float16,float16,0,0.03945599993069967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,4,64,128,1,fp8,fp8,0,0.03965333352486292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,4,64,0,1,float16,fp8,0,0.04043200115362803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,4,64,0,1,fp8,fp8,0,0.03914133210976919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,8,64,128,1,float16,float16,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,8,64,128,1,float16,fp8,0,0.04147200038035711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,8,64,128,1,fp8,fp8,0,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,8,64,0,1,float16,float16,0,0.039647998909155525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,8,64,0,1,float16,fp8,0,0.039808000127474465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,8,64,0,1,fp8,fp8,0,0.03781333317359289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,40,64,128,1,float16,float16,0,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,40,64,0,1,float16,float16,0,0.03035199890534083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,40,64,128,1,float16,fp8,0,0.02972800036271413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,40,64,128,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,40,64,0,1,float16,fp8,0,0.031146667897701263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,40,64,0,1,fp8,fp8,0,0.029232000311215717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,2,64,128,1,float16,float16,0,0.029946667452653248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,2,64,0,1,float16,float16,0,0.029792000850041706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,2,64,128,1,float16,fp8,0,0.030000001192092896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,2,64,128,1,fp8,fp8,0,0.028938665986061096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,2,64,0,1,float16,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,2,64,0,1,fp8,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,4,64,128,1,float16,float16,0,0.029711998999118805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,4,64,128,1,float16,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,4,64,0,1,float16,float16,0,0.029333333174387615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,4,64,0,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,4,64,0,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,4,64,128,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,8,64,128,1,float16,float16,0,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,8,64,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,8,64,128,1,float16,fp8,0,0.030048000315825146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,8,64,128,1,fp8,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,8,64,0,1,float16,fp8,0,0.029189333319664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,8,64,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,40,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,40,64,0,1,float16,float16,0,0.023056000471115112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,40,64,128,1,float16,fp8,0,0.023813332120577495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,40,64,128,1,fp8,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,40,64,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,2,64,128,1,float16,float16,0,0.02163733293612798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,40,64,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,2,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,2,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,2,64,0,1,float16,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,2,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,2,64,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,4,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,4,64,0,1,float16,float16,0,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,4,64,128,1,float16,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,4,64,128,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,4,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,4,64,0,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,8,64,128,1,float16,float16,0,0.022986667851607006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,8,64,0,1,float16,float16,0,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,8,64,128,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,8,64,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,8,64,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,8,64,0,1,float16,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,40,64,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,40,64,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,40,64,128,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,40,64,128,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,40,64,0,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,40,64,0,1,fp8,fp8,0,0.018698666244745255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,2,64,128,1,float16,float16,0,0.019733333339293797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,2,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,2,64,128,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,2,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,2,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,2,64,0,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,4,64,128,1,float16,float16,0,0.01977066695690155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,4,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,4,64,128,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,4,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,4,64,128,1,fp8,fp8,0,0.017808000246683758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,4,64,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,8,64,128,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,8,64,128,1,float16,fp8,0,0.019861333072185516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,8,64,0,1,float16,float16,0,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,8,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,8,64,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,8,64,0,1,fp8,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,2,64,128,1,float16,float16,0,0.40649600823720294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,2,64,0,1,float16,float16,0,0.4082719882329305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,2,64,128,1,float16,fp8,0,0.4068959951400757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,2,64,128,1,fp8,fp8,0,0.38762132326761883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,2,64,0,1,float16,fp8,0,0.4059679905573527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,2,64,0,1,fp8,fp8,0,0.38817067941029865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,4,64,128,1,float16,float16,0,0.4121119976043701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,4,64,0,1,float16,float16,0,0.4109813372294108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,4,64,128,1,float16,fp8,0,0.40908265113830566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,4,64,128,1,fp8,fp8,0,0.3943626483281453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,4,64,0,1,float16,fp8,0,0.40885865688323975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,4,64,0,1,fp8,fp8,0,0.395957350730896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,8,64,128,1,float16,float16,0,0.41502400239308673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,8,64,0,1,float16,float16,0,0.4163946708043416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,8,64,128,1,float16,fp8,0,0.4137386480967204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,8,64,128,1,fp8,fp8,0,0.4016266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,8,64,0,1,float16,fp8,0,0.4137493371963501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,40,64,128,1,float16,float16,0,0.23402667045593262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,40,64,0,1,float16,float16,0,0.2326080004374186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,8,64,0,1,fp8,fp8,0,0.40035200119018555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,40,64,128,1,float16,fp8,0,0.23108800252278647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,40,64,128,1,fp8,fp8,0,0.2326026757558187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,40,64,0,1,float16,fp8,0,0.2295573353767395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,40,64,0,1,fp8,fp8,0,0.2338506579399109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,2,64,128,1,float16,float16,0,0.2104640007019043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,2,64,0,1,float16,float16,0,0.21192532777786255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,2,64,128,1,float16,fp8,0,0.2117919921875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,2,64,128,1,fp8,fp8,0,0.203658660252889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,2,64,0,1,float16,fp8,0,0.21182399988174438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,2,64,0,1,fp8,fp8,0,0.2023680011431376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,4,64,0,1,float16,float16,0,0.21412799755732217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,4,64,128,1,float16,float16,0,0.21356266736984253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,4,64,128,1,float16,fp8,0,0.21424533923467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,4,64,128,1,fp8,fp8,0,0.21082667509714761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,4,64,0,1,float16,fp8,0,0.21434666713078818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,4,64,0,1,fp8,fp8,0,0.20880534251530966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,8,64,128,1,float16,float16,0,0.21601066986719766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,8,64,0,1,float16,float16,0,0.2161440054575602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,8,64,128,1,float16,fp8,0,0.21541867653528848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,8,64,128,1,fp8,fp8,0,0.20857600371042886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,8,64,0,1,float16,fp8,0,0.21532267332077026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,8,64,0,1,fp8,fp8,0,0.20836800336837769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,40,64,128,1,float16,float16,0,0.12691199779510498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,40,64,0,1,float16,float16,0,0.12777066230773926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,40,64,128,1,fp8,fp8,0,0.12739200393358865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,40,64,128,1,float16,fp8,0,0.12422399719556172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,40,64,0,1,float16,fp8,0,0.12596799929936728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,40,64,0,1,fp8,fp8,0,0.127402663230896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,2,64,128,1,float16,float16,0,0.1139413317044576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,2,64,128,1,fp8,fp8,0,0.10778666536013286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,2,64,0,1,float16,float16,0,0.1143946647644043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,2,64,128,1,float16,fp8,0,0.11331199606259663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,2,64,0,1,float16,fp8,0,0.11397332946459453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,4,64,128,1,float16,float16,0,0.1149120032787323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,2,64,0,1,fp8,fp8,0,0.10806399583816528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,4,64,128,1,float16,fp8,0,0.11556266744931538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,4,64,0,1,float16,float16,0,0.11508267124493916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,4,64,128,1,fp8,fp8,0,0.10962133606274922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,4,64,0,1,float16,fp8,0,0.11458133657773335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,4,64,0,1,fp8,fp8,0,0.10955199599266052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,8,64,128,1,float16,float16,0,0.11533866326014201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,8,64,0,1,float16,float16,0,0.11530133088429768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,8,64,128,1,float16,fp8,0,0.11540266871452332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,8,64,128,1,fp8,fp8,0,0.11326932907104492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,8,64,0,1,float16,fp8,0,0.11528000235557556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,8,64,0,1,fp8,fp8,0,0.1127359966437022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,40,64,128,1,float16,float16,0,0.07039466500282288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,40,64,0,1,float16,float16,0,0.07030400137106578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,40,64,128,1,float16,fp8,0,0.06991466879844666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,40,64,128,1,fp8,fp8,0,0.07261866827805837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,40,64,0,1,float16,fp8,0,0.06922133266925812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,40,64,0,1,fp8,fp8,0,0.07247999807198842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,2,64,128,1,float16,fp8,0,0.06413866579532623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,2,64,128,1,float16,float16,0,0.06442666550477345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,2,64,128,1,fp8,fp8,0,0.06039999922116598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,2,64,0,1,float16,float16,0,0.06444799900054932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,2,64,0,1,float16,fp8,0,0.06437333424886067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,4,64,128,1,float16,float16,0,0.06422399977842967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,2,64,0,1,fp8,fp8,0,0.060453335444132485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,4,64,0,1,float16,float16,0,0.06479466458161671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,4,64,128,1,float16,fp8,0,0.06437333424886067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,4,64,128,1,fp8,fp8,0,0.06029866635799408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,4,64,0,1,float16,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,4,64,0,1,fp8,fp8,0,0.06042666733264923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,8,64,128,1,float16,float16,0,0.06412266691525777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,8,64,0,1,float16,float16,0,0.06522666911284129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,8,64,128,1,fp8,fp8,0,0.06076266864935557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,8,64,128,1,float16,fp8,0,0.06502933303515117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,8,64,0,1,float16,fp8,0,0.06489066779613495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,8,64,0,1,fp8,fp8,0,0.06251733501752217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,40,64,128,1,float16,float16,0,0.040106666584809623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,40,64,0,1,float16,float16,0,0.04215466479460398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,40,64,128,1,float16,fp8,0,0.04144533226887385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,40,64,128,1,fp8,fp8,0,0.041450666884581246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,40,64,0,1,float16,fp8,0,0.040565334260463715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,40,64,0,1,fp8,fp8,0,0.04030400017897288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,2,64,128,1,float16,float16,0,0.03999999910593033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,2,64,0,1,float16,float16,0,0.04005866746107737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,2,64,128,1,float16,fp8,0,0.03984000037113825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,2,64,128,1,fp8,fp8,0,0.03937600056330363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,2,64,0,1,float16,fp8,0,0.0390133336186409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,2,64,0,1,fp8,fp8,0,0.037445334096749626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,4,64,128,1,float16,float16,0,0.03991466760635376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,4,64,0,1,float16,float16,0,0.03945599993069967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,4,64,128,1,fp8,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,4,64,128,1,float16,fp8,0,0.04035199930270513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,4,64,0,1,float16,fp8,0,0.039994666973749794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,4,64,0,1,fp8,fp8,0,0.03809600075085958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,8,64,128,1,float16,float16,0,0.04081066697835922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,8,64,128,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,8,64,0,1,float16,float16,0,0.04130133241415024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,8,64,128,1,fp8,fp8,0,0.03941866755485535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,8,64,0,1,float16,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,8,64,0,1,fp8,fp8,0,0.039887999494870506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,40,64,128,1,float16,float16,0,0.02995733420054118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,40,64,0,1,float16,float16,0,0.02731200059254964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,40,64,128,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,40,64,128,1,fp8,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,40,64,0,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,40,64,0,1,fp8,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,2,64,128,1,float16,float16,0,0.029157333076000214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,2,64,0,1,float16,float16,0,0.028602667152881622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,2,64,128,1,float16,fp8,0,0.02852799991766612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,2,64,128,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,2,64,0,1,float16,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,2,64,0,1,fp8,fp8,0,0.027077332139015198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,4,64,128,1,float16,float16,0,0.0276853342851003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,4,64,0,1,float16,float16,0,0.027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,4,64,128,1,float16,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,4,64,128,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,4,64,0,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,4,64,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,8,64,128,1,float16,float16,0,0.029114666084448498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,8,64,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,8,64,128,1,float16,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,8,64,128,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,8,64,0,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,8,64,0,1,fp8,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,40,64,128,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,40,64,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,40,64,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,40,64,128,1,fp8,fp8,0,0.02201066662867864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,40,64,0,1,float16,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,40,64,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,2,64,128,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,2,64,0,1,float16,float16,0,0.022991999983787537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,2,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,2,64,128,1,fp8,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,2,64,0,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,2,64,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,4,64,128,1,float16,float16,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,4,64,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,4,64,128,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,4,64,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,4,64,0,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,4,64,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,8,64,128,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,8,64,0,1,float16,float16,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,8,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,8,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,8,64,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,8,64,0,1,fp8,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,40,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,40,64,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,40,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,40,64,128,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,40,64,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,40,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,2,64,128,1,float16,float16,0,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,2,64,0,1,float16,float16,0,0.018858666221300762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,2,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,2,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,2,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,4,64,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,4,64,128,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,4,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,4,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,4,64,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,4,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,8,64,128,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,8,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,8,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,8,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,8,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,8,64,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,40,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,40,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,40,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,40,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,40,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,40,64,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,2,64,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,2,64,0,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,2,64,128,1,float16,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,2,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,2,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,2,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,4,64,128,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,4,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,4,64,128,1,float16,fp8,0,0.016000000139077503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,4,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,4,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,4,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,8,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,8,64,0,1,float16,float16,0,0.016629333297411602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,8,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,8,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,8,64,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,8,64,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,2,64,128,1,float16,float16,0,0.2805226643880208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,2,64,0,1,float16,float16,0,0.2812426686286926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,2,64,128,1,float16,fp8,0,0.2816320061683655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,2,64,128,1,fp8,fp8,0,0.2649173339207967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,2,64,0,1,fp8,fp8,0,0.26573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,2,64,0,1,float16,fp8,0,0.28061334292093915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,4,64,128,1,float16,float16,0,0.2835413416226705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,4,64,0,1,float16,float16,0,0.284496009349823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,4,64,128,1,float16,fp8,0,0.2825760046641032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,4,64,128,1,fp8,fp8,0,0.27081066370010376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,4,64,0,1,float16,fp8,0,0.281877338886261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,4,64,0,1,fp8,fp8,0,0.27261332670847577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,8,64,128,1,float16,float16,0,0.28570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,8,64,0,1,float16,float16,0,0.28570665915807086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,8,64,128,1,float16,fp8,0,0.2863573431968689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,8,64,128,1,fp8,fp8,0,0.2714666724205017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,40,64,128,1,float16,float16,0,0.16059199968973795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,8,64,0,1,float16,fp8,0,0.28549333413441974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,8,64,0,1,fp8,fp8,0,0.27324267228444415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,40,64,0,1,float16,float16,0,0.16099199652671814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,40,64,128,1,float16,fp8,0,0.15979199608167013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,40,64,128,1,fp8,fp8,0,0.15843199690183005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,40,64,0,1,float16,fp8,0,0.16005333264668783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,40,64,0,1,fp8,fp8,0,0.159061332543691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,2,64,128,1,float16,float16,0,0.14873600006103516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,2,64,0,1,float16,float16,0,0.14812800288200378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,2,64,128,1,float16,fp8,0,0.14830933014551798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,2,64,128,1,fp8,fp8,0,0.14006400108337402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,2,64,0,1,float16,fp8,0,0.1471733351548513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,2,64,0,1,fp8,fp8,0,0.13983466227849325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,4,64,128,1,float16,float16,0,0.150325338045756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,4,64,0,1,float16,float16,0,0.14825066924095154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,4,64,128,1,float16,fp8,0,0.1488053301970164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,4,64,128,1,fp8,fp8,0,0.1420799990495046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,4,64,0,1,float16,fp8,0,0.14869333306948343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,4,64,0,1,fp8,fp8,0,0.14140799641609192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,8,64,128,1,float16,float16,0,0.1513920029004415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,8,64,0,1,float16,float16,0,0.15120533108711243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,8,64,128,1,float16,fp8,0,0.15054399768511453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,8,64,128,1,fp8,fp8,0,0.14548266927401224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,8,64,0,1,float16,fp8,0,0.15067733327547708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,40,64,128,1,float16,float16,0,0.08564266562461853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,8,64,0,1,fp8,fp8,0,0.1437333325544993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,40,64,0,1,float16,float16,0,0.08533866206804912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,40,64,128,1,float16,fp8,0,0.08475733796755473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,40,64,128,1,fp8,fp8,0,0.08731200297673543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,40,64,0,1,float16,fp8,0,0.08502399921417236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,40,64,0,1,fp8,fp8,0,0.08806932965914409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,2,64,128,1,float16,float16,0,0.08077333370844524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,2,64,0,1,float16,float16,0,0.08101866642634074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,2,64,128,1,float16,fp8,0,0.0804319977760315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,2,64,128,1,fp8,fp8,0,0.07611200213432312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,2,64,0,1,float16,fp8,0,0.08077866832415263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,2,64,0,1,fp8,fp8,0,0.07768533130486806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,4,64,128,1,float16,float16,0,0.08067733546098073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,4,64,0,1,float16,float16,0,0.08110400040944417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,4,64,128,1,float16,fp8,0,0.08179733157157898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,4,64,128,1,fp8,fp8,0,0.07634133100509644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,4,64,0,1,float16,fp8,0,0.081386665503184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,4,64,0,1,fp8,fp8,0,0.07649066547552745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,8,64,128,1,float16,float16,0,0.08072533210118611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,8,64,0,1,float16,float16,0,0.0807360013326009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,8,64,128,1,float16,fp8,0,0.08069866895675659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,8,64,128,1,fp8,fp8,0,0.07677866518497467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,8,64,0,1,float16,fp8,0,0.08071466783682506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,8,64,0,1,fp8,fp8,0,0.07740800082683563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,40,64,128,1,float16,float16,0,0.04970133304595947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,40,64,128,1,float16,fp8,0,0.04984533290068308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,40,64,128,1,fp8,fp8,0,0.04795733094215393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,40,64,0,1,float16,fp8,0,0.050000001986821495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,40,64,0,1,float16,float16,0,0.04949333270390829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,40,64,0,1,fp8,fp8,0,0.04789333542188009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,2,64,128,1,float16,float16,0,0.048026666045188904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,2,64,0,1,float16,float16,0,0.048112000028292336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,2,64,128,1,float16,fp8,0,0.04919999837875366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,2,64,128,1,fp8,fp8,0,0.046682665745417275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,2,64,0,1,float16,fp8,0,0.048101335763931274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,2,64,0,1,fp8,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,4,64,0,1,float16,float16,0,0.047930667797724404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,4,64,128,1,float16,fp8,0,0.04799999793370565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,4,64,128,1,fp8,fp8,0,0.0460746685663859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,4,64,128,1,float16,float16,0,0.049312000473340355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,4,64,0,1,float16,fp8,0,0.04776533444722494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,4,64,0,1,fp8,fp8,0,0.04613333443800608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,8,64,128,1,float16,float16,0,0.04794666667779287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,8,64,128,1,float16,fp8,0,0.049423997600873314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,8,64,0,1,float16,float16,0,0.04931733508904775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,8,64,128,1,fp8,fp8,0,0.045642669002215065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,8,64,0,1,float16,fp8,0,0.04773333172003428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,8,64,0,1,fp8,fp8,0,0.04605866471926371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,40,64,0,1,float16,float16,0,0.031930667658646904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,40,64,128,1,float16,fp8,0,0.03276800115903219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,40,64,128,1,float16,float16,0,0.0322026660044988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,40,64,128,1,fp8,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,40,64,0,1,float16,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,40,64,0,1,fp8,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,2,64,128,1,float16,float16,0,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,2,64,128,1,float16,fp8,0,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,2,64,0,1,float16,float16,0,0.032287999987602234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,2,64,128,1,fp8,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,2,64,0,1,float16,fp8,0,0.031471999982992806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,2,64,0,1,fp8,fp8,0,0.031178665657838184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,4,64,128,1,float16,float16,0,0.03327466547489166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,4,64,0,1,float16,float16,0,0.032831999162832894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,4,64,128,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,4,64,128,1,fp8,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,4,64,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,4,64,0,1,fp8,fp8,0,0.03150933235883713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,8,64,0,1,float16,float16,0,0.03129599988460541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,8,64,128,1,float16,float16,0,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,8,64,128,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,8,64,128,1,fp8,fp8,0,0.031178665657838184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,8,64,0,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,8,64,0,1,fp8,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,40,64,128,1,float16,float16,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,40,64,0,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,40,64,128,1,float16,fp8,0,0.024512000381946564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,40,64,128,1,fp8,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,40,64,0,1,float16,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,2,64,128,1,float16,float16,0,0.02481599897146225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,40,64,0,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,2,64,0,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,2,64,128,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,2,64,128,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,2,64,0,1,float16,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,2,64,0,1,fp8,fp8,0,0.023007998863856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,4,64,128,1,float16,float16,0,0.02492800106604894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,4,64,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,4,64,128,1,float16,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,4,64,128,1,fp8,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,4,64,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,4,64,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,8,64,128,1,float16,float16,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,8,64,0,1,float16,float16,0,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,8,64,128,1,float16,fp8,0,0.02497600018978119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,8,64,128,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,8,64,0,1,float16,fp8,0,0.024170666933059692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,8,64,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,40,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,40,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,40,64,128,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,40,64,128,1,fp8,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,40,64,0,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,40,64,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,2,64,128,1,float16,float16,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,2,64,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,2,64,128,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,2,64,128,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,2,64,0,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,2,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,4,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,4,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,4,64,128,1,float16,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,4,64,128,1,fp8,fp8,0,0.018725333114465077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,4,64,0,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,4,64,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,8,64,128,1,float16,float16,0,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,8,64,128,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,8,64,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,8,64,128,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,8,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,8,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,40,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,40,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,40,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,40,64,128,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,40,64,0,1,float16,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,40,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,2,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,2,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,2,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,2,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,2,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,2,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,4,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,4,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,4,64,128,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,4,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,4,64,0,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,4,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,8,64,128,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,8,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,8,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,8,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,8,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,8,64,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,40,64,128,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,40,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,40,64,128,1,float16,fp8,0,0.016506666938463848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,40,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,40,64,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,40,64,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,2,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,2,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,2,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,2,64,128,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,2,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,2,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,4,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,4,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,4,64,128,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,4,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,4,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,4,64,0,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,8,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,8,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,8,64,128,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,8,64,128,1,float16,fp8,0,0.016549333930015564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,8,64,0,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,8,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,2,64,128,1,float16,float16,0,0.22206934293111166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,2,64,0,1,float16,float16,0,0.22009066740671793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,2,64,128,1,fp8,fp8,0,0.2060799996058146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,2,64,0,1,float16,fp8,0,0.22025066614151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,2,64,128,1,float16,fp8,0,0.2211093306541443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,2,64,0,1,fp8,fp8,0,0.2063466707865397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,4,64,128,1,float16,float16,0,0.22038400173187256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,4,64,0,1,float16,float16,0,0.22262932856877646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,4,64,128,1,float16,fp8,0,0.2215893268585205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,4,64,128,1,fp8,fp8,0,0.20777066548665366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,4,64,0,1,float16,fp8,0,0.22091732422510782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,4,64,0,1,fp8,fp8,0,0.20856000979741415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,8,64,128,1,float16,float16,0,0.2211893399556478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,8,64,0,1,float16,float16,0,0.22217599550882974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,8,64,128,1,fp8,fp8,0,0.21057599782943726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,8,64,128,1,float16,fp8,0,0.22092799345652261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,8,64,0,1,float16,fp8,0,0.22228266795476279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,40,64,128,1,float16,float16,0,0.12131733695665996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,8,64,0,1,fp8,fp8,0,0.21126933892567953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,40,64,0,1,float16,float16,0,0.12147200107574463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,40,64,128,1,fp8,fp8,0,0.12160000205039978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,40,64,128,1,float16,fp8,0,0.12095466256141663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,40,64,0,1,fp8,fp8,0,0.12103999654452006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,40,64,0,1,float16,fp8,0,0.1216319998105367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,2,64,128,1,float16,float16,0,0.11682666341463725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,2,64,0,1,float16,float16,0,0.11559466520945232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,2,64,128,1,float16,fp8,0,0.11563199758529663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,2,64,128,1,fp8,fp8,0,0.11008532842000325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,2,64,0,1,float16,fp8,0,0.1167093316713969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,2,64,0,1,fp8,fp8,0,0.10938666264216106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,4,64,128,1,float16,float16,0,0.11590933799743652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,4,64,0,1,float16,float16,0,0.11680533488591512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,4,64,128,1,float16,fp8,0,0.11735999584197998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,4,64,128,1,fp8,fp8,0,0.11001066366831462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,4,64,0,1,float16,fp8,0,0.11667733391125996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,4,64,0,1,fp8,fp8,0,0.1104693313439687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,8,64,0,1,float16,float16,0,0.11731732885042827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,8,64,128,1,float16,fp8,0,0.11758400003115337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,8,64,128,1,float16,float16,0,0.11713600158691406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,8,64,128,1,fp8,fp8,0,0.10936533411343892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,8,64,0,1,float16,fp8,0,0.117658664782842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,8,64,0,1,fp8,fp8,0,0.11037866274515788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,40,64,128,1,float16,float16,0,0.0661599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,40,64,0,1,float16,float16,0,0.06651733318964641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,40,64,128,1,float16,fp8,0,0.06682666639486949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,40,64,128,1,fp8,fp8,0,0.06437866886456807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,40,64,0,1,float16,fp8,0,0.06654933094978333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,40,64,0,1,fp8,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,2,64,128,1,float16,float16,0,0.06457066535949707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,2,64,0,1,float16,float16,0,0.06488533318042755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,2,64,128,1,float16,fp8,0,0.06528533498446147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,2,64,128,1,fp8,fp8,0,0.0627040018637975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,2,64,0,1,float16,fp8,0,0.06479999919732411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,2,64,0,1,fp8,fp8,0,0.06234133243560791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,4,64,128,1,float16,float16,0,0.06628799935181935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,4,64,0,1,float16,float16,0,0.06470933556556702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,4,64,128,1,fp8,fp8,0,0.06239999830722809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,4,64,128,1,float16,fp8,0,0.06459199885527293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,4,64,0,1,float16,fp8,0,0.06434666613737743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,4,64,0,1,fp8,fp8,0,0.0621919979651769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,8,64,128,1,float16,float16,0,0.06436799963315327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,8,64,128,1,float16,fp8,0,0.06484800080458324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,8,64,0,1,float16,float16,0,0.064560001095136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,8,64,128,1,fp8,fp8,0,0.06275733311971028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,8,64,0,1,float16,fp8,0,0.06407466530799866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,8,64,0,1,fp8,fp8,0,0.06232533355553945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,40,64,128,1,float16,float16,0,0.04041066765785217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,40,64,0,1,float16,float16,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,40,64,128,1,float16,fp8,0,0.04146133363246918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,40,64,128,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,40,64,0,1,float16,fp8,0,0.042026668787002563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,40,64,0,1,fp8,fp8,0,0.04038933416207632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,2,64,128,1,float16,float16,0,0.04051200052102407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,2,64,0,1,float16,float16,0,0.04110399881998698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,2,64,128,1,fp8,fp8,0,0.03942933430274328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,2,64,128,1,float16,fp8,0,0.03998400022586187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,2,64,0,1,float16,fp8,0,0.04061333338419596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,2,64,0,1,fp8,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,4,64,128,1,float16,float16,0,0.0401653324564298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,4,64,128,1,float16,fp8,0,0.04223466912905375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,4,64,0,1,float16,float16,0,0.040192000567913055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,4,64,128,1,fp8,fp8,0,0.039333333571751915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,4,64,0,1,float16,fp8,0,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,4,64,0,1,fp8,fp8,0,0.03813866774241129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,8,64,128,1,float16,float16,0,0.03985599925120672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,8,64,0,1,float16,float16,0,0.0417546679576238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,8,64,128,1,float16,fp8,0,0.040048000713189445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,8,64,128,1,fp8,fp8,0,0.038762666285037994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,8,64,0,1,fp8,fp8,0,0.03950933367013931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,8,64,0,1,float16,fp8,0,0.03995199998219808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,40,64,128,1,float16,float16,0,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,40,64,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,40,64,128,1,float16,fp8,0,0.028175999720891316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,40,64,128,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,40,64,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,40,64,0,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,2,64,128,1,float16,float16,0,0.02758399893840154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,2,64,128,1,float16,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,2,64,128,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,2,64,0,1,float16,float16,0,0.02863466739654541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,2,64,0,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,2,64,0,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,4,64,128,1,float16,float16,0,0.027589333554108936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,4,64,0,1,float16,float16,0,0.02914133419593175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,4,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,4,64,128,1,fp8,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,4,64,0,1,float16,fp8,0,0.029552000264326733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,4,64,0,1,fp8,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,8,64,128,1,float16,float16,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,8,64,0,1,float16,float16,0,0.027610667049884796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,8,64,128,1,float16,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,8,64,128,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,8,64,0,1,float16,fp8,0,0.029146666328112285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,40,64,128,1,float16,float16,0,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,8,64,0,1,fp8,fp8,0,0.0276053324341774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,40,64,0,1,float16,float16,0,0.021568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,40,64,128,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,40,64,128,1,fp8,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,40,64,0,1,float16,fp8,0,0.0220266655087471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,40,64,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,2,64,128,1,float16,float16,0,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,2,64,0,1,float16,float16,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,2,64,128,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,2,64,128,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,2,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,2,64,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,4,64,128,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,4,64,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,4,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,4,64,128,1,fp8,fp8,0,0.020794666061798733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,4,64,0,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,4,64,0,1,fp8,fp8,0,0.02090666691462199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,8,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,8,64,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,8,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,8,64,128,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,8,64,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,8,64,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,40,64,128,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,40,64,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,40,64,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,40,64,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,40,64,0,1,float16,fp8,0,0.017711999515692394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,40,64,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,2,64,128,1,float16,float16,0,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,2,64,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,2,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,2,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,2,64,0,1,float16,fp8,0,0.017637333522240322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,2,64,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,4,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,4,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,4,64,128,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,4,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,4,64,0,1,float16,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,4,64,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,8,64,128,1,float16,float16,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,8,64,0,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,8,64,128,1,float16,fp8,0,0.017792000124851864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,8,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,8,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,8,64,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,40,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,40,64,0,1,float16,float16,0,0.015615999698638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,40,64,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,40,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,40,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,40,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,2,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,2,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,2,64,128,1,fp8,fp8,0,0.016336000214020412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,2,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,2,64,0,1,float16,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,2,64,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,4,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,4,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,4,64,128,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,4,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,4,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,4,64,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,8,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,8,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,8,64,128,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,8,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,8,64,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,8,64,0,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,40,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,40,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,40,64,128,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,40,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,40,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,40,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,2,64,128,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,2,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,2,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,2,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,2,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,4,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,2,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,4,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,4,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,4,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,4,64,0,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,4,64,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,8,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,8,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,8,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,8,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,8,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,8,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,2,64,0,1,float16,float16,0,0.1916266679763794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,2,64,128,1,float16,fp8,0,0.19137599070866904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,2,64,128,1,float16,float16,0,0.19286400079727173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,2,64,128,1,fp8,fp8,0,0.1772480010986328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,2,64,0,1,float16,fp8,0,0.19083199898401895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,4,64,128,1,float16,float16,0,0.19146132469177246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,2,64,0,1,fp8,fp8,0,0.17831466595331827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,4,64,0,1,float16,float16,0,0.19132800896962485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,4,64,128,1,float16,fp8,0,0.1917440096537272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,4,64,0,1,float16,fp8,0,0.19139200448989868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,4,64,128,1,fp8,fp8,0,0.17795199155807495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,4,64,0,1,fp8,fp8,0,0.17923200130462646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,8,64,128,1,float16,float16,0,0.19128000736236572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,8,64,0,1,float16,float16,0,0.19222400585810342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,8,64,128,1,float16,fp8,0,0.19147199392318726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,8,64,128,1,fp8,fp8,0,0.17934934298197427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,8,64,0,1,float16,fp8,0,0.19211200873057047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,40,64,128,1,float16,float16,0,0.10403199990590413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,8,64,0,1,fp8,fp8,0,0.17812800407409668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,40,64,0,1,float16,float16,0,0.10342400272687276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,40,64,128,1,float16,fp8,0,0.10401599605878194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,40,64,128,1,fp8,fp8,0,0.09724266330401103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,40,64,0,1,float16,fp8,0,0.10326932867368062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,2,64,128,1,float16,float16,0,0.10103467106819153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,40,64,0,1,fp8,fp8,0,0.09922666351000468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,2,64,0,1,float16,float16,0,0.10322133700052898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,2,64,128,1,float16,fp8,0,0.1032373309135437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,2,64,128,1,fp8,fp8,0,0.09751466910044353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,2,64,0,1,float16,fp8,0,0.10281067093213399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,2,64,0,1,fp8,fp8,0,0.09715732932090759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,4,64,128,1,float16,float16,0,0.10364266236623128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,4,64,0,1,float16,float16,0,0.10314133763313293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,4,64,128,1,float16,fp8,0,0.10131733616193135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,4,64,0,1,float16,fp8,0,0.10331733028093974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,4,64,128,1,fp8,fp8,0,0.09558399518330891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,4,64,0,1,fp8,fp8,0,0.09585600097974141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,8,64,128,1,float16,float16,0,0.10199466347694397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,8,64,0,1,float16,float16,0,0.10154666503270467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,8,64,128,1,float16,fp8,0,0.10331733028093974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,8,64,128,1,fp8,fp8,0,0.09693333506584167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,40,64,128,1,float16,float16,0,0.06135466694831848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,8,64,0,1,fp8,fp8,0,0.09650133053461711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,8,64,0,1,float16,fp8,0,0.10363733768463135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,40,64,0,1,float16,float16,0,0.060133333007494606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,40,64,128,1,float16,fp8,0,0.06035199761390686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,40,64,128,1,fp8,fp8,0,0.05646933118502299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,40,64,0,1,fp8,fp8,0,0.056346664826075234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,2,64,128,1,float16,float16,0,0.05820799867312113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,40,64,0,1,float16,fp8,0,0.060640002290407814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,2,64,0,1,float16,float16,0,0.05827199916044871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,2,64,128,1,fp8,fp8,0,0.053914666175842285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,2,64,128,1,float16,fp8,0,0.057914664347966514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,2,64,0,1,float16,fp8,0,0.059631998340288796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,2,64,0,1,fp8,fp8,0,0.056458666920661926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,4,64,128,1,float16,float16,0,0.0580213318268458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,4,64,0,1,float16,float16,0,0.058176000912984215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,4,64,128,1,float16,fp8,0,0.058415999015172325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,4,64,128,1,fp8,fp8,0,0.05428266525268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,4,64,0,1,float16,fp8,0,0.05783999959627787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,4,64,0,1,fp8,fp8,0,0.054655998945236206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,8,64,128,1,float16,float16,0,0.05864533285299937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,8,64,128,1,float16,fp8,0,0.058058664202690125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,8,64,0,1,float16,float16,0,0.0584746648867925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,8,64,128,1,fp8,fp8,0,0.05592533449331919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,40,64,0,1,float16,float16,0,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,8,64,0,1,fp8,fp8,0,0.055999999245007835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,8,64,0,1,float16,fp8,0,0.058431997895240784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,40,64,128,1,float16,float16,0,0.038106667498747505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,40,64,128,1,float16,fp8,0,0.03793066740036011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,40,64,128,1,fp8,fp8,0,0.037477334340413414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,40,64,0,1,float16,fp8,0,0.03763733307520548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,40,64,0,1,fp8,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,2,64,128,1,float16,float16,0,0.03734400123357773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,2,64,0,1,float16,float16,0,0.03733866661787033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,2,64,128,1,float16,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,2,64,128,1,fp8,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,2,64,0,1,float16,fp8,0,0.03737066686153412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,2,64,0,1,fp8,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,4,64,128,1,float16,float16,0,0.03777066618204117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,4,64,0,1,float16,float16,0,0.038176000118255615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,4,64,128,1,float16,fp8,0,0.03793599953254064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,4,64,128,1,fp8,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,4,64,0,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,4,64,0,1,fp8,fp8,0,0.03633599976698557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,8,64,128,1,float16,float16,0,0.03733866661787033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,8,64,0,1,float16,float16,0,0.0359253336985906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,8,64,128,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,8,64,0,1,float16,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,8,64,0,1,fp8,fp8,0,0.035391998787721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,40,64,128,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,8,64,128,1,fp8,fp8,0,0.03583466758330663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,40,64,0,1,float16,float16,0,0.027109332382678986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,40,64,128,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,40,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,40,64,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,40,64,0,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,2,64,128,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,2,64,128,1,float16,fp8,0,0.026560001075267792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,2,64,0,1,float16,float16,0,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,2,64,128,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,2,64,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,2,64,0,1,fp8,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,4,64,128,1,float16,float16,0,0.025797332326571148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,4,64,0,1,float16,float16,0,0.025594666600227356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,4,64,128,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,4,64,0,1,float16,fp8,0,0.026176000634829204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,4,64,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,4,64,128,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,8,64,128,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,8,64,0,1,float16,float16,0,0.02647999922434489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,8,64,128,1,float16,fp8,0,0.02593066543340683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,8,64,128,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,8,64,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,8,64,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,40,64,128,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,40,64,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,40,64,128,1,float16,fp8,0,0.020794666061798733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,40,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,40,64,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,40,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,2,64,128,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,2,64,0,1,float16,float16,0,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,2,64,128,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,2,64,128,1,float16,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,2,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,2,64,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,4,64,0,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,4,64,128,1,float16,float16,0,0.01952533299724261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,4,64,128,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,4,64,128,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,4,64,0,1,float16,fp8,0,0.01977066695690155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,4,64,0,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,8,64,128,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,8,64,0,1,float16,float16,0,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,8,64,128,1,float16,fp8,0,0.02075200031201045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,8,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,8,64,0,1,float16,fp8,0,0.019648000597953796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,8,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,40,64,128,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,40,64,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,40,64,128,1,float16,fp8,0,0.01874133323629697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,40,64,128,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,40,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,2,64,128,1,float16,float16,0,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,40,64,0,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,2,64,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,2,64,128,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,2,64,128,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,2,64,0,1,float16,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,2,64,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,4,64,128,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,4,64,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,4,64,128,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,4,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,4,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,4,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,8,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,8,64,128,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,8,64,128,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,8,64,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,8,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,8,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,40,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,40,64,0,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,40,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,40,64,128,1,fp8,fp8,0,0.016399999459584553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,40,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,40,64,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,2,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,2,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,2,64,128,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,2,64,128,1,fp8,fp8,0,0.016458666572968166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,2,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,4,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,2,64,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,4,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,4,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,4,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,4,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,4,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,8,64,128,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,8,64,0,1,float16,float16,0,0.015754666179418564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,8,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,8,64,128,1,fp8,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,8,64,0,1,float16,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,8,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,40,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,40,64,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,40,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,40,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,40,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,2,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,40,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,2,64,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,2,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,2,64,128,1,fp8,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,2,64,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,2,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,4,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,4,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,4,64,128,1,float16,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,4,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,4,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,4,64,0,1,fp8,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,8,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,8,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,8,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,8,64,128,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,8,64,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,8,64,0,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,2,64,0,1,float16,float16,0,0.16568000117937723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,2,64,128,1,float16,float16,0,0.1662773291269938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,2,64,128,1,float16,fp8,0,0.16477866967519125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,2,64,0,1,fp8,fp8,0,0.150325338045756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,2,64,128,1,fp8,fp8,0,0.1508799990018209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,2,64,0,1,float16,fp8,0,0.1672746737798055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,4,64,128,1,float16,float16,0,0.16686934232711792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,4,64,0,1,float16,float16,0,0.16520532965660095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,4,64,128,1,float16,fp8,0,0.1673226753870646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,4,64,128,1,fp8,fp8,0,0.15201066931088766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,4,64,0,1,float16,fp8,0,0.16530133287111917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,4,64,0,1,fp8,fp8,0,0.1527839998404185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,8,64,128,1,float16,float16,0,0.1651893357435862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,8,64,0,1,float16,float16,0,0.16511999567349753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,8,64,128,1,float16,fp8,0,0.16747732957204184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,8,64,128,1,fp8,fp8,0,0.15058666467666626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,8,64,0,1,fp8,fp8,0,0.15129066507021585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,8,64,0,1,float16,fp8,0,0.16592533389727274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,40,64,128,1,float16,float16,0,0.09081600109736125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,0,0.08947199583053589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,40,64,128,1,float16,fp8,0,0.09117866555849712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,40,64,128,1,fp8,fp8,0,0.08310399949550629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,0,0.08931199709574382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,40,64,0,1,fp8,fp8,0,0.0814879983663559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,2,64,128,1,float16,float16,0,0.08946133653322856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,2,64,0,1,float16,float16,0,0.08933867017428081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,2,64,128,1,float16,fp8,0,0.08918399612108867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,2,64,128,1,fp8,fp8,0,0.08283733328183492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,2,64,0,1,float16,fp8,0,0.09084266424179077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,4,64,128,1,float16,float16,0,0.08864532907803853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,2,64,0,1,fp8,fp8,0,0.08125333487987518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,4,64,0,1,float16,float16,0,0.09105066458384196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,4,64,128,1,float16,fp8,0,0.08893332878748576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,4,64,128,1,fp8,fp8,0,0.08124800026416779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,4,64,0,1,float16,fp8,0,0.09097599983215332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,4,64,0,1,fp8,fp8,0,0.08189866443475087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,8,64,128,1,float16,float16,0,0.08924266695976257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,8,64,0,1,float16,float16,0,0.09122133255004883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,8,64,128,1,float16,fp8,0,0.08907199899355571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,8,64,128,1,fp8,fp8,0,0.082805335521698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,40,64,128,1,float16,float16,0,0.051776001850763954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,8,64,0,1,fp8,fp8,0,0.08264000217119853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,8,64,0,1,float16,fp8,0,0.09012800455093384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,0,0.05247466762860616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,40,64,128,1,float16,fp8,0,0.05193066596984863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,40,64,128,1,fp8,fp8,0,0.047968000173568726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,0,0.05211733281612396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,2,64,128,1,float16,float16,0,0.05203199883302053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,40,64,0,1,fp8,fp8,0,0.04809600114822388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,2,64,0,1,float16,float16,0,0.05220800141493479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,2,64,128,1,float16,fp8,0,0.052298665046691895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,2,64,128,1,fp8,fp8,0,0.047872001926104225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,2,64,0,1,float16,fp8,0,0.05231466889381409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,2,64,0,1,fp8,fp8,0,0.0480320006608963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,4,64,0,1,float16,float16,0,0.0518453319867452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,4,64,128,1,float16,float16,0,0.05231999854246775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,4,64,128,1,fp8,fp8,0,0.04821866750717163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,4,64,128,1,float16,fp8,0,0.05261866748332977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,4,64,0,1,float16,fp8,0,0.05169066786766052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,4,64,0,1,fp8,fp8,0,0.04849066833655039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,8,64,128,1,float16,float16,0,0.0518506666024526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,8,64,0,1,float16,float16,0,0.052058666944503784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,8,64,128,1,float16,fp8,0,0.0521919975678126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,8,64,0,1,float16,fp8,0,0.051957334081331887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,8,64,128,1,fp8,fp8,0,0.048394665122032166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,8,64,0,1,fp8,fp8,0,0.04840533435344696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,40,64,128,1,float16,float16,0,0.033941333492596946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,0,0.033589333295822144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,40,64,128,1,float16,fp8,0,0.033786666889985405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,40,64,128,1,fp8,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,40,64,0,1,fp8,fp8,0,0.03257066756486893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,2,64,128,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,2,64,0,1,float16,float16,0,0.03330666571855545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,2,64,128,1,float16,fp8,0,0.034128000338872276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,2,64,128,1,fp8,fp8,0,0.03192000091075897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,2,64,0,1,fp8,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,2,64,0,1,float16,fp8,0,0.034602666894594826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,4,64,128,1,float16,float16,0,0.03395200024048487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,4,64,0,1,float16,float16,0,0.033802665770053864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,4,64,128,1,float16,fp8,0,0.033301333586374916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,4,64,128,1,fp8,fp8,0,0.03205333401759466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,4,64,0,1,float16,fp8,0,0.033743999898433685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,4,64,0,1,fp8,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,8,64,128,1,float16,float16,0,0.034101332227389015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,8,64,0,1,float16,float16,0,0.033930666744709015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,8,64,128,1,float16,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,8,64,128,1,fp8,fp8,0,0.03177600105603536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,8,64,0,1,fp8,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,8,64,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,40,64,128,1,float16,float16,0,0.024501333634058636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,40,64,128,1,float16,fp8,0,0.02587733417749405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,40,64,128,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,2,64,128,1,float16,float16,0,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,40,64,0,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,2,64,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,2,64,128,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,2,64,128,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,2,64,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,2,64,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,4,64,128,1,float16,float16,0,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,4,64,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,4,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,4,64,0,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,4,64,128,1,fp8,fp8,0,0.023743999501069386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,4,64,0,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,8,64,128,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,8,64,0,1,float16,float16,0,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,8,64,128,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,8,64,128,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,8,64,0,1,float16,fp8,0,0.02443733314673106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,8,64,0,1,fp8,fp8,0,0.023669332265853882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,40,64,128,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,40,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,40,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,40,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,2,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,2,64,0,1,float16,float16,0,0.019861333072185516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,2,64,128,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,2,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,2,64,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,2,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,4,64,128,1,float16,float16,0,0.019546666493018467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,4,64,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,4,64,128,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,4,64,128,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,4,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,4,64,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,8,64,0,1,float16,float16,0,0.01956266661485036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,8,64,128,1,float16,float16,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,8,64,128,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,8,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,8,64,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,8,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,40,64,128,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,40,64,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,40,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,40,64,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,2,64,128,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,2,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,2,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,2,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,2,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,2,64,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,4,64,128,1,float16,float16,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,4,64,0,1,float16,float16,0,0.017903999735911686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,4,64,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,4,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,4,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,4,64,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,8,64,128,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,8,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,8,64,128,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,8,64,128,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,8,64,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,8,64,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,40,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,40,64,128,1,float16,fp8,0,0.014783999572197596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,40,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,40,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,2,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,2,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,2,64,128,1,float16,fp8,0,0.015775999675194424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,2,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,2,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,2,64,0,1,fp8,fp8,0,0.016682667036851246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,4,64,128,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,4,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,4,64,128,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,4,64,0,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,4,64,128,1,fp8,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,4,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,8,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,8,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,8,64,128,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,8,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,8,64,128,1,float16,fp8,0,0.017818666994571686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,8,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,40,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,40,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,40,64,128,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,40,64,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,0,0.015674666812022526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,2,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,2,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,2,64,128,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,2,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,2,64,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,2,64,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,4,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,4,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,4,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,4,64,128,1,float16,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,4,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,4,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,8,64,128,1,float16,float16,0,0.015909332782030106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,8,64,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,8,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,8,64,128,1,fp8,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,8,64,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,8,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,float16,0,1.5952746073404949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,1,64,128,1,float16,fp8,0,1.6062134106953938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,1,64,128,1,fp8,fp8,0,1.499824047088623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,float16,0,1.6117706298828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,float16,0,8.489199956258139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,2,64,128,1,float16,fp8,0,1.6266239484151204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,1,64,0,1,float16,fp8,0,8.501967748006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,1,64,0,1,fp8,fp8,0,7.324405034383138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,2,64,128,1,fp8,fp8,0,1.5225920677185059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,float16,0,1.6282240549723308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,float16,0,8.520575841267904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,4,64,128,1,float16,fp8,0,1.6413280169169109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,2,64,0,1,fp8,fp8,0,7.351296106974284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,2,64,0,1,float16,fp8,0,8.51748275756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,4,64,128,1,fp8,fp8,0,1.5420212745666504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,float16,0,1.6556426684061687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,float16,0,8.525349299112955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,8,64,128,1,float16,fp8,0,1.6751947402954102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,4,64,0,1,fp8,fp8,0,7.385557174682617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,8,64,128,1,fp8,fp8,0,1.5796160697937012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,4,64,0,1,float16,fp8,0,8.538922627766928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,float16,0,0.9493920008341471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,float16,0,8.598175684611002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,32,64,128,1,float16,fp8,0,0.9734186331431071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,32,64,128,1,fp8,fp8,0,0.9285919666290283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,float16,0,4.474783897399902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,8,64,0,1,fp8,fp8,0,7.419125239054362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,8,64,0,1,float16,fp8,0,8.594517389933268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,float16,0,0.8376320203145345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,1,64,128,1,float16,fp8,0,0.8468960126241049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,32,64,0,1,fp8,fp8,0,3.888847986857096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,32,64,0,1,float16,fp8,0,4.505589485168457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,1,64,128,1,fp8,fp8,0,0.7900640169779459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,float16,0,0.8409279982248942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,float16,0,4.329557418823242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,2,64,128,1,float16,fp8,0,0.8498559792836508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,1,64,0,1,fp8,fp8,0,3.7470827102661133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,1,64,0,1,float16,fp8,0,4.33786137898763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,2,64,128,1,fp8,fp8,0,0.7973492940266927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,float16,0,0.8493226369222006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,float16,0,4.3300479253133135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,4,64,128,1,float16,fp8,0,0.8573280175526937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,2,64,0,1,fp8,fp8,0,3.7473812103271484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,4,64,128,1,fp8,fp8,0,0.8053653240203857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,2,64,0,1,float16,fp8,0,4.348410606384277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,float16,0,0.8613226413726807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,float16,0,4.336533228556315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,8,64,128,1,float16,fp8,0,0.8745439847310384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,4,64,0,1,fp8,fp8,0,3.779989242553711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,4,64,0,1,float16,fp8,0,4.354319890340169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,8,64,128,1,fp8,fp8,0,0.8238773345947266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,float16,0,0.5280213356018066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,float16,0,4.378986676534017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,32,64,128,1,float16,fp8,0,0.5373493432998657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,32,64,128,1,fp8,fp8,0,0.5203786691029867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,float16,0,2.3295626640319824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,8,64,0,1,fp8,fp8,0,3.7746718724568686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,8,64,0,1,float16,fp8,0,4.37996260325114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,float16,0,0.47387198607126874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,1,64,128,1,float16,fp8,0,0.47634132703145343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,32,64,0,1,float16,fp8,0,2.3482774098714194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,32,64,0,1,fp8,fp8,0,2.041360060373942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,1,64,128,1,fp8,fp8,0,0.4502986669540405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,float16,0,2.2753705978393555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,float16,0,0.47573331991831463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,2,64,128,1,float16,fp8,0,0.48024535179138184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,1,64,0,1,fp8,fp8,0,1.9671680132548015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,1,64,0,1,float16,fp8,0,2.260202725728353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,2,64,128,1,fp8,fp8,0,0.45395731925964355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,float16,0,2.2772693634033203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,float16,0,0.4782400131225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,2,64,0,1,fp8,fp8,0,1.9709280331929524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,2,64,0,1,float16,fp8,0,2.2623146375020347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,4,64,128,1,float16,fp8,0,0.4840533336003621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,4,64,128,1,fp8,fp8,0,0.45767998695373535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,float16,0,2.261829376220703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,float16,0,0.48446933428446454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,4,64,0,1,fp8,fp8,0,1.9750933647155762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,4,64,0,1,float16,fp8,0,2.272746721903483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,8,64,128,1,float16,fp8,0,0.49262932936350506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,8,64,128,1,fp8,fp8,0,0.4663146734237671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,float16,0,2.2734293937683105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,float16,0,0.3717600107192993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,8,64,0,1,fp8,fp8,0,1.9832159678141277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,8,64,0,1,float16,fp8,0,2.281829357147217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,32,64,128,1,float16,fp8,0,0.3728373448053996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,32,64,128,1,fp8,fp8,0,0.35500800609588623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,float16,0,1.3192373116811116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,float16,0,0.37001601854960126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,32,64,0,1,fp8,fp8,0,1.153061310450236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,32,64,0,1,float16,fp8,0,1.318832000096639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,1,64,128,1,float16,fp8,0,0.3698506752649943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,float16,0,1.3003573417663574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,1,64,128,1,fp8,fp8,0,0.3544693390528361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,float16,0,0.36790398756663006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,1,64,0,1,float16,fp8,0,1.3024746576944988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,1,64,0,1,fp8,fp8,0,1.147871971130371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,2,64,128,1,float16,fp8,0,0.368064006169637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,float16,0,1.3039360046386719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,2,64,128,1,fp8,fp8,0,0.35334400335947674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,float16,0,0.37166400750478107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,2,64,0,1,float16,fp8,0,1.3023680051167805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,2,64,0,1,fp8,fp8,0,1.145626703898112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,4,64,128,1,float16,fp8,0,0.37226665019989014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,4,64,128,1,fp8,fp8,0,0.3537493149439494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,float16,0,1.3050560156504314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,float16,0,0.371616005897522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,4,64,0,1,float16,fp8,0,1.301199992497762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,4,64,0,1,fp8,fp8,0,1.1498719851175945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,8,64,128,1,float16,fp8,0,0.3715200026830037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,float16,0,1.3077706495920818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,8,64,128,1,fp8,fp8,0,0.35303465525309247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,8,64,0,1,float16,fp8,0,1.3063146273295085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,8,64,0,1,fp8,fp8,0,1.1482933362325032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,float16,0,1.1905759970347087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,1,64,128,1,fp8,fp8,0,1.114240010579427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,1,64,128,1,float16,fp8,0,1.2011146545410156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,float16,0,1.197760025660197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,float16,0,5.039461453755696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,1,64,0,1,float16,fp8,0,5.082031885782878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,2,64,128,1,float16,fp8,0,1.2084266344706218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,1,64,0,1,fp8,fp8,0,4.376650810241699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,2,64,128,1,fp8,fp8,0,1.1303520202636719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,float16,0,5.064298629760742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,float16,0,1.207909345626831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,4,64,128,1,float16,fp8,0,1.220911979675293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,2,64,0,1,fp8,fp8,0,4.39243729909261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,2,64,0,1,float16,fp8,0,5.069237391153972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,4,64,128,1,fp8,fp8,0,1.1440906524658203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,float16,0,1.2287627061208088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,float16,0,5.07582410176595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,8,64,128,1,float16,fp8,0,1.2427039941151936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,4,64,0,1,fp8,fp8,0,4.398058573404948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,4,64,0,1,float16,fp8,0,5.093503952026367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,8,64,128,1,fp8,fp8,0,1.171445369720459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,float16,0,0.7153493563334147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,float16,0,5.09878412882487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,32,64,128,1,float16,fp8,0,0.7339573701222738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,8,64,0,1,fp8,fp8,0,4.438085238138835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,float16,0,2.702773412068685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,32,64,128,1,fp8,fp8,0,0.7011626561482748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,8,64,0,1,float16,fp8,0,5.119610786437988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,float16,0,0.6323306560516357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,1,64,128,1,float16,fp8,0,0.6387519836425781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,32,64,0,1,fp8,fp8,0,2.366096019744873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,32,64,0,1,float16,fp8,0,2.7205918629964194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,float16,0,2.5944159825642905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,1,64,128,1,fp8,fp8,0,0.5971413453420004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,float16,0,0.635258674621582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,2,64,128,1,float16,fp8,0,0.6416053374608358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,1,64,0,1,float16,fp8,0,2.5932586987813315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,1,64,0,1,fp8,fp8,0,2.2569333712259927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,2,64,128,1,fp8,fp8,0,0.6024800141652426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,float16,0,2.5982133547465005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,float16,0,0.6400320132573446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,4,64,128,1,float16,fp8,0,0.6477386554082235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,2,64,0,1,fp8,fp8,0,2.2621919314066568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,2,64,0,1,float16,fp8,0,2.6022987365722656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,4,64,128,1,fp8,fp8,0,0.6093279918034872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,float16,0,2.6006080309549966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,float16,0,0.650821328163147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,4,64,0,1,fp8,fp8,0,2.270122687021891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,8,64,128,1,float16,fp8,0,0.659711996714274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,4,64,0,1,float16,fp8,0,2.6128692626953125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,8,64,128,1,fp8,fp8,0,0.6218560139338175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,float16,0,2.616410732269287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,float16,0,0.3999040126800537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,32,64,128,1,float16,fp8,0,0.4110506772994995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,8,64,0,1,fp8,fp8,0,2.282896041870117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,float16,0,1.423397382100423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,8,64,0,1,float16,fp8,0,2.623162587483724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,32,64,128,1,fp8,fp8,0,0.39628799756368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,float16,0,0.35944000879923504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,32,64,0,1,float16,fp8,0,1.4363999366760254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,32,64,0,1,fp8,fp8,0,1.25382399559021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,1,64,128,1,float16,fp8,0,0.363264004389445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,1,64,128,1,fp8,fp8,0,0.3431893189748128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,float16,0,1.3676212628682454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,float16,0,0.36180798212687176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,1,64,0,1,fp8,fp8,0,1.1990826924641926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,1,64,0,1,float16,fp8,0,1.3719946543375652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,2,64,128,1,float16,fp8,0,0.3656373421351115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,2,64,128,1,fp8,fp8,0,0.34747199217478436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,float16,0,1.3709759712219238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,float16,0,0.36562132835388184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,2,64,0,1,fp8,fp8,0,1.2039946715037029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,2,64,0,1,float16,fp8,0,1.3734505971272786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,4,64,128,1,float16,fp8,0,0.3680373430252075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,4,64,128,1,fp8,fp8,0,0.34907201925913495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,float16,0,1.3753066062927246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,float16,0,0.3694933255513509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,4,64,0,1,fp8,fp8,0,1.2036426862080891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,4,64,0,1,float16,fp8,0,1.3795199394226074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,8,64,128,1,float16,fp8,0,0.37588798999786377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,8,64,128,1,fp8,fp8,0,0.3564320007960002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,float16,0,1.3821279207865398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,float16,0,0.2833706736564636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,8,64,0,1,float16,fp8,0,1.3889759381612141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,32,64,128,1,float16,fp8,0,0.28351465861002606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,8,64,0,1,fp8,fp8,0,1.2122613588968914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,float16,0,0.827023983001709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,32,64,128,1,fp8,fp8,0,0.2711626688639323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,float16,0,0.28146133820215863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,32,64,0,1,float16,fp8,0,0.827023983001709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,32,64,0,1,fp8,fp8,0,0.7297013600667318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,float16,0,0.8166613578796387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,1,64,128,1,float16,fp8,0,0.2834666570027669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,1,64,128,1,fp8,fp8,0,0.2692906657854716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,float16,0,0.28361066182454425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,1,64,0,1,float16,fp8,0,0.8166560331980387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,1,64,0,1,fp8,fp8,0,0.7238986492156982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,2,64,128,1,float16,fp8,0,0.2834879954655965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,float16,0,0.8167893091837565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,2,64,128,1,fp8,fp8,0,0.27153066794077557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,float16,0,0.2813280026117961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,2,64,0,1,float16,fp8,0,0.8167893091837565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,2,64,0,1,fp8,fp8,0,0.7218186855316162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,4,64,128,1,float16,fp8,0,0.28352532784144086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,4,64,128,1,fp8,fp8,0,0.2708746592203776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,float16,0,0.8184320131937662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,float16,0,0.2829226652781169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,4,64,0,1,float16,fp8,0,0.8166293303171793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,4,64,0,1,fp8,fp8,0,0.7253386974334717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,8,64,128,1,fp8,fp8,0,0.2710240085919698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,8,64,128,1,float16,fp8,0,0.2831839919090271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,float16,0,0.8190933068593343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,8,64,0,1,float16,fp8,0,0.8186986446380615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,8,64,0,1,fp8,fp8,0,0.7257493336995443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,float16,0,0.9899466832478842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,1,64,128,1,float16,fp8,0,0.9997653166453043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,1,64,128,1,fp8,fp8,0,0.9297920068105062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,float16,0,0.9955786863962809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,float16,0,3.6564693450927734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,1,64,0,1,float16,fp8,0,3.672074635823568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,2,64,128,1,float16,fp8,0,1.0068106651306152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,1,64,0,1,fp8,fp8,0,3.1734612782796225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,2,64,128,1,fp8,fp8,0,0.9391893545786539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,float16,0,3.6605494817097983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,float16,0,1.0059786637624104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,2,64,0,1,fp8,fp8,0,3.1828587849934897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,4,64,128,1,float16,fp8,0,1.0160426298777263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,2,64,0,1,float16,fp8,0,3.668719927469889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,4,64,128,1,fp8,fp8,0,0.9487520058949789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,float16,0,3.6754401524861655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,float16,0,1.0213546752929688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,4,64,0,1,fp8,fp8,0,3.2007147471110025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,8,64,128,1,float16,fp8,0,1.034058650334676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,4,64,0,1,float16,fp8,0,3.688997268676758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,8,64,128,1,fp8,fp8,0,0.9725120067596436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,float16,0,0.5984799861907959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,float16,0,3.6940959294637046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,32,64,128,1,float16,fp8,0,0.6124106645584106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,8,64,0,1,fp8,fp8,0,3.225520133972168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,8,64,0,1,float16,fp8,0,3.707317352294922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,32,64,128,1,fp8,fp8,0,0.5859786669413248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,float16,0,1.9799359639485676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,float16,0,0.5290986696879069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,1,64,128,1,float16,fp8,0,0.5334719816843668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,32,64,0,1,float16,fp8,0,1.9951519966125488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,32,64,0,1,fp8,fp8,0,1.7404106458028157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,1,64,128,1,fp8,fp8,0,0.5003840128580729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,float16,0,1.8840160369873047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,float16,0,0.5305386781692505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,1,64,0,1,fp8,fp8,0,1.6466827392578125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,1,64,0,1,float16,fp8,0,1.8924214045206706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,2,64,128,1,float16,fp8,0,0.536736011505127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,2,64,128,1,fp8,fp8,0,0.5053973197937012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,float16,0,1.8887359301249187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,float16,0,0.5357973178227743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,2,64,0,1,fp8,fp8,0,1.6527466773986816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,2,64,0,1,float16,fp8,0,1.897066593170166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,4,64,128,1,float16,fp8,0,0.5412960052490234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,float16,0,1.8996319770812988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,4,64,128,1,fp8,fp8,0,0.5100320180257162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,float16,0,0.5440906683603922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,4,64,0,1,float16,fp8,0,1.8998239835103352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,4,64,0,1,fp8,fp8,0,1.660213311513265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,8,64,128,1,float16,fp8,0,0.5517386595408121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,8,64,128,1,fp8,fp8,0,0.5213226477305094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,float16,0,1.9092532793680828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,float16,0,0.3354666630427043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,8,64,0,1,float16,fp8,0,1.914101282755534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,8,64,0,1,fp8,fp8,0,1.6713013648986816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,float16,0,1.0527679920196533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,32,64,128,1,float16,fp8,0,0.34457600116729736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,32,64,128,1,fp8,fp8,0,0.33212800820668537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,float16,0,0.2983413338661194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,32,64,0,1,float16,fp8,0,1.0620266596476238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,32,64,0,1,fp8,fp8,0,0.9335306485493978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,1,64,128,1,fp8,fp8,0,0.2878719965616862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,1,64,128,1,float16,fp8,0,0.30221333106358844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,float16,0,1.00382399559021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,float16,0,0.3014346758524577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,1,64,0,1,fp8,fp8,0,0.8835946718851725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,2,64,128,1,float16,fp8,0,0.3040800094604492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,1,64,0,1,float16,fp8,0,1.0085066954294841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,2,64,128,1,fp8,fp8,0,0.29072533051172894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,float16,0,1.0068000157674153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,float16,0,0.3041973312695821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,2,64,0,1,fp8,fp8,0,0.8912959893544515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,2,64,0,1,float16,fp8,0,1.0084266662597656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,4,64,128,1,float16,fp8,0,0.3083146611849467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,4,64,128,1,fp8,fp8,0,0.2929439942042033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,float16,0,1.011631965637207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,float16,0,0.3086880048116048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,4,64,0,1,float16,fp8,0,1.0146719614664714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,4,64,0,1,fp8,fp8,0,0.8934079806009928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,8,64,128,1,float16,fp8,0,0.3141706585884094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,8,64,128,1,fp8,fp8,0,0.2999946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,float16,0,1.0188319683074951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,float16,0,0.2398080031077067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,32,64,128,1,float16,fp8,0,0.23972799380620322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,float16,0,0.6240106821060181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,8,64,0,1,float16,fp8,0,1.0202240149180095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,8,64,0,1,fp8,fp8,0,0.8986079692840576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,32,64,128,1,fp8,fp8,0,0.22839999198913574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,float16,0,0.23667200406392416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,32,64,0,1,fp8,fp8,0,0.5506026744842529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,32,64,0,1,float16,fp8,0,0.6236213445663452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,1,64,128,1,float16,fp8,0,0.23710399866104126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,1,64,128,1,fp8,fp8,0,0.22631466388702393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,float16,0,0.6110186576843262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,1,64,0,1,float16,fp8,0,0.6136800050735474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,float16,0,0.23570666710535684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,1,64,0,1,fp8,fp8,0,0.5455946524937948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,float16,0,0.6138613224029541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,2,64,128,1,float16,fp8,0,0.23831466833750406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,2,64,128,1,fp8,fp8,0,0.2266826629638672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,2,64,0,1,float16,fp8,0,0.6137813329696655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,float16,0,0.23869866132736206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,2,64,0,1,fp8,fp8,0,0.5455466508865356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,float16,0,0.6157493193944296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,4,64,128,1,float16,fp8,0,0.23651200532913208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,4,64,128,1,fp8,fp8,0,0.22809600830078125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,float16,0,0.23761065800984701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,4,64,0,1,float16,fp8,0,0.6141173442204794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,4,64,0,1,fp8,fp8,0,0.5450186729431152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,8,64,128,1,float16,fp8,0,0.23873066902160645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,float16,0,0.6157600084940592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,8,64,128,1,fp8,fp8,0,0.228383998076121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,8,64,0,1,float16,fp8,0,0.6146719853083292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,8,64,0,1,fp8,fp8,0,0.5498186747233073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,float16,0,1.5471359888712566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,1,64,128,1,float16,fp8,0,1.5592586199442546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,1,64,128,1,fp8,fp8,0,1.449072043100993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,float16,0,1.5681974093119304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,float16,0,4.887616157531738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,1,64,0,1,fp8,fp8,0,4.252453486124675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,1,64,0,1,float16,fp8,0,4.894240061442058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,2,64,128,1,float16,fp8,0,1.5807894070943196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,2,64,128,1,fp8,fp8,0,1.4757173856099446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,float16,0,4.909616152445476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,float16,0,1.5818986892700195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,2,64,0,1,fp8,fp8,0,4.288293202718099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,2,64,0,1,float16,fp8,0,4.92087999979655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,4,64,128,1,float16,fp8,0,1.5970773696899414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,4,64,128,1,fp8,fp8,0,1.4951680501302083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,float16,0,4.9309492111206055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,float16,0,1.6139626502990723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,4,64,0,1,fp8,fp8,0,4.30131721496582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,8,64,128,1,float16,fp8,0,1.6292533874511719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,4,64,0,1,float16,fp8,0,4.944042523701985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,8,64,128,1,fp8,fp8,0,1.5332372983296711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,float16,0,4.9767147699991865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,float16,0,0.9056800206502279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,32,64,128,1,float16,fp8,0,0.9281866550445557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,8,64,0,1,fp8,fp8,0,4.347632090250651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,32,64,128,1,fp8,fp8,0,0.883674701054891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,8,64,0,1,float16,fp8,0,4.995173454284668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,float16,0,2.634394645690918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,float16,0,0.7925333182017008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,32,64,0,1,float16,fp8,0,2.6516480445861816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,32,64,0,1,fp8,fp8,0,2.3216373125712075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,1,64,128,1,float16,fp8,0,0.7996266682942709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,1,64,128,1,fp8,fp8,0,0.7466346422831217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,float16,0,2.484917322794596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,float16,0,0.7983946800231934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,1,64,0,1,fp8,fp8,0,2.1659199396769204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,1,64,0,1,float16,fp8,0,2.487941265106201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,2,64,128,1,float16,fp8,0,0.8054773012797037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,2,64,128,1,fp8,fp8,0,0.7529653708140055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,float16,0,2.491663932800293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,float16,0,0.8055360317230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,2,64,0,1,fp8,fp8,0,2.1768266359965005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,4,64,128,1,float16,fp8,0,0.8147892951965332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,2,64,0,1,float16,fp8,0,2.4949013392130532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,float16,0,2.5025973320007324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,4,64,128,1,fp8,fp8,0,0.7618827025095621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,float16,0,0.8176159858703613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,4,64,0,1,fp8,fp8,0,2.1820640563964844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,4,64,0,1,float16,fp8,0,2.509653409322103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,8,64,128,1,float16,fp8,0,0.8295466899871826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,8,64,128,1,fp8,fp8,0,0.7790239651997884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,float16,0,2.5188533465067544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,float16,0,0.48179201285044354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,32,64,128,1,float16,fp8,0,0.49324798583984375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,8,64,0,1,fp8,fp8,0,2.2051146825154624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,float16,0,1.3618346850077312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,8,64,0,1,float16,fp8,0,2.5315252939860025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,32,64,128,1,fp8,fp8,0,0.47206934293111164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,float16,0,0.4245706796646118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,32,64,0,1,float16,fp8,0,1.376757303873698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,1,64,128,1,float16,fp8,0,0.4289226531982422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,32,64,0,1,fp8,fp8,0,1.210805336634318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,float16,0,1.2882239818572998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,1,64,128,1,fp8,fp8,0,0.4047520160675049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,float16,0,0.42789868513743085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,1,64,0,1,fp8,fp8,0,1.1352852980295818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,1,64,0,1,float16,fp8,0,1.2947093645731609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,2,64,128,1,float16,fp8,0,0.4317599932352702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,float16,0,1.2947146892547607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,2,64,128,1,fp8,fp8,0,0.408735990524292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,float16,0,0.4320533275604248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,2,64,0,1,float16,fp8,0,1.2987253665924072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,2,64,0,1,fp8,fp8,0,1.1379786332448323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,4,64,128,1,float16,fp8,0,0.43722132841746014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,float16,0,1.3012746969858806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,4,64,128,1,fp8,fp8,0,0.4113226731618245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,float16,0,0.4394559860229492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,4,64,0,1,float16,fp8,0,1.3036213715871174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,4,64,0,1,fp8,fp8,0,1.1427733103434246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,8,64,128,1,float16,fp8,0,0.44496532281239826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,float16,0,1.3091253439585369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,8,64,128,1,fp8,fp8,0,0.41994134585062665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,float16,0,0.27242666482925415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,32,64,128,1,float16,fp8,0,0.27986133098602295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,float16,0,0.7335626284281412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,8,64,0,1,float16,fp8,0,1.3164479732513428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,8,64,0,1,fp8,fp8,0,1.1514879862467449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,32,64,128,1,fp8,fp8,0,0.2712213397026062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,float16,0,0.24101332823435465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,32,64,0,1,fp8,fp8,0,0.659114678700765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,32,64,0,1,float16,fp8,0,0.7405333518981934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,1,64,128,1,float16,fp8,0,0.24334933360417685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,float16,0,0.6932693322499593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,1,64,128,1,fp8,fp8,0,0.23457600673039755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,float16,0,0.2421600023905436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,1,64,0,1,float16,fp8,0,0.6949706872304281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,1,64,0,1,fp8,fp8,0,0.617797334988912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,float16,0,0.6955733299255371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,2,64,128,1,float16,fp8,0,0.24487467606862387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,2,64,128,1,fp8,fp8,0,0.2363040049870809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,float16,0,0.24643200635910034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,2,64,0,1,float16,fp8,0,0.6975039641062418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,2,64,0,1,fp8,fp8,0,0.6177759965260824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,float16,0,0.6990880171457926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,4,64,128,1,float16,fp8,0,0.2487893303235372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,4,64,128,1,fp8,fp8,0,0.23868266741434732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,float16,0,0.25171200434366864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,4,64,0,1,float16,fp8,0,0.7016746997833252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,4,64,0,1,fp8,fp8,0,0.6214986642201742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,8,64,128,1,float16,fp8,0,0.25487999121348065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,float16,0,0.7063626448313395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,8,64,128,1,fp8,fp8,0,0.24304000536600748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,float16,0,0.19593600432078043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,8,64,0,1,float16,fp8,0,0.7088906764984131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,8,64,0,1,fp8,fp8,0,0.6270666519800822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,float16,0,0.44741864999135333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,32,64,128,1,float16,fp8,0,0.19571733474731445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,32,64,128,1,fp8,fp8,0,0.1851253310839335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,float16,0,0.18971733252207437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,32,64,0,1,float16,fp8,0,0.4456746578216553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,32,64,0,1,fp8,fp8,0,0.3978453477223714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,float16,0,0.4367946783701579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,1,64,128,1,fp8,fp8,0,0.18318933248519897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,1,64,128,1,float16,fp8,0,0.18978132804234824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,1,64,0,1,float16,fp8,0,0.4392640193303426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,float16,0,0.19121599197387695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,1,64,0,1,fp8,fp8,0,0.39208531379699707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,float16,0,0.43830398718516034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,2,64,128,1,float16,fp8,0,0.1905226707458496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,2,64,128,1,fp8,fp8,0,0.18318400780359903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,2,64,0,1,float16,fp8,0,0.4386560122172038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,float16,0,0.18965866168340048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,2,64,0,1,fp8,fp8,0,0.39020800590515137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,4,64,128,1,float16,fp8,0,0.18940265973409018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,4,64,128,1,fp8,fp8,0,0.18320000171661377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,float16,0,0.4374133348464966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,float16,0,0.18967467546463013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,4,64,0,1,float16,fp8,0,0.43864532311757404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,4,64,0,1,fp8,fp8,0,0.39109333356221515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,8,64,128,1,float16,fp8,0,0.19197867314020792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,8,64,128,1,fp8,fp8,0,0.18429332971572876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,float16,0,0.43853334585825604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,8,64,0,1,fp8,fp8,0,0.3943146864573161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,8,64,0,1,float16,fp8,0,0.4413226842880249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,float16,0,1.1545173327128093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,1,64,128,1,float16,fp8,0,1.16484268506368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,1,64,128,1,fp8,fp8,0,1.07806396484375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,float16,0,2.994762738545736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,float16,0,1.1637120246887207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,1,64,0,1,float16,fp8,0,3.0034399032592773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,1,64,0,1,fp8,fp8,0,2.617173353830973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,2,64,128,1,float16,fp8,0,1.1738080183664958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,2,64,128,1,fp8,fp8,0,1.0951146284739177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,float16,0,1.1735040346781414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,float16,0,3.0056212743123374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,2,64,0,1,float16,fp8,0,3.0117012659708657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,2,64,0,1,fp8,fp8,0,2.635786692301432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,4,64,128,1,float16,fp8,0,1.1853546301523845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,4,64,128,1,fp8,fp8,0,1.1063466866811116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,float16,0,3.0217758814493814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,float16,0,1.1937920252482097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,4,64,0,1,float16,fp8,0,3.03330135345459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,4,64,0,1,fp8,fp8,0,2.6516745885213218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,8,64,128,1,float16,fp8,0,1.20796799659729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,8,64,128,1,fp8,fp8,0,1.1363253593444824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,float16,0,3.051189422607422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,float16,0,0.6827147006988525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,8,64,0,1,float16,fp8,0,3.0646985371907554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,32,64,128,1,float16,fp8,0,0.6995573043823242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,8,64,0,1,fp8,fp8,0,2.6800851821899414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,float16,0,1.645290692647298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,32,64,128,1,fp8,fp8,0,0.6647146542867025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,float16,0,0.5975519816080729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,32,64,0,1,float16,fp8,0,1.6614185969034831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,32,64,0,1,fp8,fp8,0,1.4645867347717285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,1,64,128,1,float16,fp8,0,0.6036693255106608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,float16,0,1.5323360761006672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,1,64,128,1,fp8,fp8,0,0.5632266600926717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,float16,0,0.6029386520385742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,1,64,0,1,float16,fp8,0,1.536629358927409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,1,64,0,1,fp8,fp8,0,1.349295934041341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,2,64,128,1,float16,fp8,0,0.609823981920878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,float16,0,1.5366719563802083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,2,64,128,1,fp8,fp8,0,0.5683840115865072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,float16,0,0.6067839860916138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,2,64,0,1,float16,fp8,0,1.5463253657023113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,2,64,0,1,fp8,fp8,0,1.3545546531677246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,4,64,128,1,float16,fp8,0,0.6153386831283569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,float16,0,1.5436159769694011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,4,64,128,1,fp8,fp8,0,0.5761440197626749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,float16,0,0.6179999907811483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,4,64,0,1,float16,fp8,0,1.554309368133545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,4,64,0,1,fp8,fp8,0,1.3606452941894531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,8,64,128,1,float16,fp8,0,0.6270986795425415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,float16,0,1.561738650004069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,8,64,128,1,fp8,fp8,0,0.5883573293685913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,float16,0,0.36588799953460693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,8,64,0,1,float16,fp8,0,1.5694773991902669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,8,64,0,1,fp8,fp8,0,1.3733332951863606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,float16,0,0.8610026836395264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,32,64,128,1,float16,fp8,0,0.3754826784133911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,32,64,128,1,fp8,fp8,0,0.35946667194366455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,float16,0,0.3224853277206421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,32,64,0,1,fp8,fp8,0,0.7722773551940918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,32,64,0,1,float16,fp8,0,0.8707413673400879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,1,64,128,1,float16,fp8,0,0.3253973325093587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,float16,0,0.8031840324401855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,1,64,128,1,fp8,fp8,0,0.31010133028030396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,float16,0,0.32387733459472656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,1,64,0,1,float16,fp8,0,0.8058986663818359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,1,64,0,1,fp8,fp8,0,0.7140906651814779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,float16,0,0.80731733640035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,2,64,128,1,fp8,fp8,0,0.3105600078900655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,2,64,128,1,float16,fp8,0,0.32838932673136395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,float16,0,0.32757333914438885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,2,64,0,1,float16,fp8,0,0.8102560043334961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,2,64,0,1,fp8,fp8,0,0.717029333114624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,float16,0,0.8122186660766602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,4,64,128,1,float16,fp8,0,0.33186666170756024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,4,64,128,1,fp8,fp8,0,0.314522663752238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,float16,0,0.33455999692281085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,4,64,0,1,float16,fp8,0,0.8156800270080566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,4,64,0,1,fp8,fp8,0,0.720192035039266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,float16,0,0.8190879821777344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,8,64,128,1,float16,fp8,0,0.3405119975407918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,8,64,128,1,fp8,fp8,0,0.3208746711413066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,float16,0,0.21142399311065674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,8,64,0,1,float16,fp8,0,0.824186642964681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,32,64,128,1,float16,fp8,0,0.21659199396769205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,float16,0,0.4719039996465047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,8,64,0,1,fp8,fp8,0,0.7289120356241862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,32,64,128,1,fp8,fp8,0,0.20778665939966837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,32,64,0,1,float16,fp8,0,0.47862398624420166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,32,64,0,1,fp8,fp8,0,0.4289493163426717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,float16,0,0.18322134017944336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,1,64,128,1,float16,fp8,0,0.1856373349825541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,1,64,128,1,fp8,fp8,0,0.18136000633239746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,float16,0,0.4392213424046834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,float16,0,0.18552533785502115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,1,64,0,1,fp8,fp8,0,0.39582931995391846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,1,64,0,1,float16,fp8,0,0.43939733505249023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,2,64,128,1,float16,fp8,0,0.18687466780344644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,2,64,128,1,fp8,fp8,0,0.18143999576568604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,float16,0,0.4413439830144246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,float16,0,0.18689066171646118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,2,64,0,1,float16,fp8,0,0.44148798783620197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,2,64,0,1,fp8,fp8,0,0.39477332433064777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,4,64,128,1,float16,fp8,0,0.18940800428390503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,4,64,128,1,fp8,fp8,0,0.18353599309921265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,float16,0,0.4423733154932658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,float16,0,0.19146132469177246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,4,64,0,1,fp8,fp8,0,0.39853866895039874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,4,64,0,1,float16,fp8,0,0.4444640080134074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,8,64,128,1,fp8,fp8,0,0.18917866547902426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,8,64,128,1,float16,fp8,0,0.194650669892629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,float16,0,0.447434663772583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,float16,0,0.15236799915631613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,8,64,0,1,fp8,fp8,0,0.4047146638234456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,8,64,0,1,float16,fp8,0,0.4506880044937134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,float16,0,0.2978666623433431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,32,64,128,1,fp8,fp8,0,0.1458453337351481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,32,64,128,1,float16,fp8,0,0.15057599544525146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,float16,0,0.14851199587186178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,32,64,0,1,float16,fp8,0,0.2958453297615051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,32,64,0,1,fp8,fp8,0,0.26740266879399616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,1,64,128,1,float16,fp8,0,0.14848533272743225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,1,64,128,1,fp8,fp8,0,0.14239466190338135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,float16,0,0.29063467184702557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,float16,0,0.14873600006103516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,1,64,0,1,fp8,fp8,0,0.2608533302942912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,1,64,0,1,float16,fp8,0,0.2905706763267517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,2,64,128,1,float16,fp8,0,0.14832533399264017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,2,64,128,1,fp8,fp8,0,0.14331733187039694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,float16,0,0.29157867034276325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,2,64,0,1,fp8,fp8,0,0.2612373431523641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,2,64,0,1,float16,fp8,0,0.2909653385480245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,float16,0,0.14964800079663595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,4,64,128,1,float16,fp8,0,0.14854933818181357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,4,64,128,1,fp8,fp8,0,0.1425279974937439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,float16,0,0.2916640043258667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,4,64,0,1,float16,fp8,0,0.2902560035387675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,float16,0,0.15067733327547708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,4,64,0,1,fp8,fp8,0,0.2608319918314616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,float16,0,0.2915733257929484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,8,64,128,1,float16,fp8,0,0.14853866895039877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,8,64,128,1,fp8,fp8,0,0.1425386667251587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,8,64,0,1,float16,fp8,0,0.2897973259290059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,8,64,0,1,fp8,fp8,0,0.2633226712544759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,float16,0,1.5233386357625325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,1,64,128,1,float16,fp8,0,1.5364640553792317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,1,64,128,1,fp8,fp8,0,1.4267946879069011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,float16,0,3.055818557739258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,float16,0,1.5441546440124512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,1,64,0,1,fp8,fp8,0,2.705930709838867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,1,64,0,1,float16,fp8,0,3.073952039082845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,2,64,128,1,float16,fp8,0,1.556656042734782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,float16,0,3.0800746281941733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,2,64,128,1,fp8,fp8,0,1.45250670115153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,2,64,0,1,float16,fp8,0,3.0994186401367188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,float16,0,1.559007962544759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,2,64,0,1,fp8,fp8,0,2.7302773793538413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,float16,0,3.105440139770508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,4,64,128,1,fp8,fp8,0,1.4707999229431152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,4,64,128,1,float16,fp8,0,1.5745546023050945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,float16,0,1.5911787350972493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,4,64,0,1,float16,fp8,0,3.1169865926106772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,4,64,0,1,fp8,fp8,0,2.75324281056722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,float16,0,3.1482772827148438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,8,64,128,1,fp8,fp8,0,1.5085919698079426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,8,64,128,1,float16,fp8,0,1.6035200754801433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,float16,0,0.8829120000203451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,float16,0,1.7060799598693848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,32,64,128,1,float16,fp8,0,0.9031360149383545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,8,64,0,1,float16,fp8,0,3.1606079737345376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,8,64,0,1,fp8,fp8,0,2.798960049947103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,32,64,128,1,fp8,fp8,0,0.8598879973093668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,float16,0,0.7705600261688232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,32,64,0,1,fp8,fp8,0,1.5314559936523438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,32,64,0,1,float16,fp8,0,1.7279574076334636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,1,64,128,1,float16,fp8,0,0.7774666945139567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,float16,0,1.5460693041483562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,1,64,128,1,fp8,fp8,0,0.7224799791971842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,float16,0,0.7762933572133383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,1,64,0,1,float16,fp8,0,1.5552639961242676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,1,64,0,1,fp8,fp8,0,1.3721760114034016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,2,64,128,1,float16,fp8,0,0.7840480009714762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,float16,0,1.555184046427409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,2,64,128,1,fp8,fp8,0,0.7291359901428223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,float16,0,0.7832746505737305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,2,64,0,1,float16,fp8,0,1.5648852984110515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,2,64,0,1,fp8,fp8,0,1.377914587656657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,4,64,128,1,float16,fp8,0,0.7911039988199869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,4,64,128,1,fp8,fp8,0,0.7369493643442789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,float16,0,1.5654880205790203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,float16,0,0.7976426283518473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,4,64,0,1,float16,fp8,0,1.5743145942687988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,4,64,0,1,fp8,fp8,0,1.3863840103149414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,8,64,128,1,fp8,fp8,0,0.7550133069356283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,8,64,128,1,float16,fp8,0,0.8065600395202637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,float16,0,1.5841174125671387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,float16,0,0.46005332469940186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,8,64,0,1,fp8,fp8,0,1.4081013997395833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,8,64,0,1,float16,fp8,0,1.5935039520263672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,32,64,128,1,float16,fp8,0,0.47222399711608887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,float16,0,0.8802399635314941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,32,64,128,1,fp8,fp8,0,0.45023465156555176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,32,64,0,1,float16,fp8,0,0.8914453188578287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,float16,0,0.40142401059468585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,32,64,0,1,fp8,fp8,0,0.7942453225453695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,1,64,128,1,float16,fp8,0,0.4062933524449666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,1,64,128,1,fp8,fp8,0,0.3816746473312378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,float16,0,0.8025546868642172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,float16,0,0.4054826498031616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,1,64,0,1,fp8,fp8,0,0.7143786748250326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,1,64,0,1,float16,fp8,0,0.8062506516774496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,float16,0,0.8071093559265137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,2,64,128,1,float16,fp8,0,0.41045331954956055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,2,64,128,1,fp8,fp8,0,0.38652801513671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,float16,0,0.4100266695022583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,2,64,0,1,float16,fp8,0,0.8099733193715414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,2,64,0,1,fp8,fp8,0,0.7194080352783203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,float16,0,0.8103360335032145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,4,64,128,1,float16,fp8,0,0.4148373206456502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,4,64,128,1,fp8,fp8,0,0.3901120026906331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,4,64,0,1,float16,fp8,0,0.814842700958252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,float16,0,0.41627732912699383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,4,64,0,1,fp8,fp8,0,0.7221866448720297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,float16,0,0.8203732967376709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,8,64,128,1,float16,fp8,0,0.42156799634297687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,8,64,128,1,fp8,fp8,0,0.3965493440628052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,float16,0,0.24938666820526123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,float16,0,0.4678506851196289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,8,64,0,1,float16,fp8,0,0.824293295542399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,8,64,0,1,fp8,fp8,0,0.7331146399180094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,32,64,128,1,float16,fp8,0,0.2563733259836833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,32,64,128,1,fp8,fp8,0,0.24644800027211508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,float16,0,0.2162239948908488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,32,64,0,1,float16,fp8,0,0.47421332200368244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,32,64,0,1,fp8,fp8,0,0.425983985265096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,float16,0,0.42501866817474365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,1,64,128,1,float16,fp8,0,0.2187733252843221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,1,64,128,1,fp8,fp8,0,0.20982933044433594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,1,64,0,1,float16,fp8,0,0.4275519847869873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,float16,0,0.21788267294565836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,1,64,0,1,fp8,fp8,0,0.3857333262761434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,float16,0,0.4273279905319214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,2,64,128,1,fp8,fp8,0,0.2124533255894979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,2,64,128,1,float16,fp8,0,0.22075732549031576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,2,64,0,1,float16,fp8,0,0.4294133186340332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,2,64,0,1,fp8,fp8,0,0.3867679834365845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,float16,0,0.22121065855026245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,float16,0,0.43134931723276776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,4,64,128,1,fp8,fp8,0,0.21607999006907144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,4,64,128,1,float16,fp8,0,0.22350400686264038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,4,64,0,1,float16,fp8,0,0.4333440065383911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,float16,0,0.22708266973495483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,4,64,0,1,fp8,fp8,0,0.39044801394144696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,float16,0,0.43714133898417157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,8,64,128,1,float16,fp8,0,0.23097066084543863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,8,64,128,1,fp8,fp8,0,0.21991467475891113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,float16,0,0.14434132973353067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,8,64,0,1,float16,fp8,0,0.44120534261067706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,8,64,0,1,fp8,fp8,0,0.39632534980773926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,float16,0,0.26295467217763263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,32,64,128,1,float16,fp8,0,0.14669332901636759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,32,64,128,1,fp8,fp8,0,0.14481066664059958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,float16,0,0.12383466958999634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,32,64,0,1,float16,fp8,0,0.26708267132441205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,32,64,0,1,fp8,fp8,0,0.2432639996210734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,float16,0,0.24085867404937744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,1,64,128,1,float16,fp8,0,0.1237333317597707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,1,64,128,1,fp8,fp8,0,0.11983999609947205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,1,64,0,1,float16,fp8,0,0.24075732628504434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,float16,0,0.12401066223780315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,1,64,0,1,fp8,fp8,0,0.21490132808685303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,float16,0,0.24071999390920004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,2,64,128,1,float16,fp8,0,0.1256160040696462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,2,64,128,1,fp8,fp8,0,0.11956266562143962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,2,64,0,1,float16,fp8,0,0.24077334006627402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,2,64,0,1,fp8,fp8,0,0.21572266022364298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,float16,0,0.12587733070055643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,float16,0,0.24224533637364706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,4,64,128,1,fp8,fp8,0,0.12192533413569133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,4,64,128,1,float16,fp8,0,0.12617066502571106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,4,64,0,1,float16,fp8,0,0.24198933442433676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,4,64,0,1,fp8,fp8,0,0.21970667441685995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,float16,0,0.12794666488965353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,8,64,128,1,float16,fp8,0,0.1299626628557841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,float16,0,0.2439146637916565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,8,64,128,1,fp8,fp8,0,0.1279039978981018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,8,64,0,1,float16,fp8,0,0.24451732635498047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,float16,0,0.10443733135859172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,8,64,0,1,fp8,fp8,0,0.22484799226125082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,float16,0,0.17229332526524863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,32,64,128,1,float16,fp8,0,0.10337600111961365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,32,64,128,1,fp8,fp8,0,0.10337600111961365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,32,64,0,1,float16,fp8,0,0.17187732458114624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,32,64,0,1,fp8,fp8,0,0.15734933813412985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,float16,0,0.10340799887975057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,float16,0,0.16961065928141275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,1,64,128,1,float16,fp8,0,0.10335999727249146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,1,64,128,1,fp8,fp8,0,0.10121066371599834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,1,64,0,1,float16,fp8,0,0.16979199647903442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,1,64,0,1,fp8,fp8,0,0.15625066558519998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,float16,0,0.10366933544476827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,2,64,128,1,float16,fp8,0,0.10455466310183208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,float16,0,0.170906662940979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,2,64,128,1,fp8,fp8,0,0.0995199978351593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,2,64,0,1,float16,fp8,0,0.17087467511494955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,2,64,0,1,fp8,fp8,0,0.15643733739852905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,float16,0,0.10339732964833577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,4,64,128,1,float16,fp8,0,0.10475732882817586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,float16,0,0.17096000909805298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,4,64,128,1,fp8,fp8,0,0.10160533587137859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,4,64,0,1,float16,fp8,0,0.17107733090718588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,4,64,0,1,fp8,fp8,0,0.15666666626930237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,float16,0,0.10371200243631999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,8,64,128,1,float16,fp8,0,0.1048959990342458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,float16,0,0.17017066478729248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,8,64,128,1,fp8,fp8,0,0.09946133693059285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,8,64,0,1,float16,fp8,0,0.17115734020868936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,8,64,0,1,fp8,fp8,0,0.15450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,float16,0,1.1357279618581135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,1,64,128,1,float16,fp8,0,1.143887996673584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,float16,0,1.9553546905517578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,1,64,128,1,fp8,fp8,0,1.060821294784546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,1,64,0,1,float16,fp8,0,1.9603254000345867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,float16,0,1.1468640168507893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,1,64,0,1,fp8,fp8,0,1.735962708791097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,2,64,128,1,float16,fp8,0,1.1577599843343098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,2,64,128,1,fp8,fp8,0,1.0767199993133545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,float16,0,1.962389310201009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,2,64,0,1,float16,fp8,0,1.9757760365804036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,float16,0,1.1593120098114014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,2,64,0,1,fp8,fp8,0,1.7531946500142415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,4,64,128,1,float16,fp8,0,1.1677652994791667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,4,64,128,1,fp8,fp8,0,1.0917387008666992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,float16,0,1.9759947458902996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,4,64,0,1,float16,fp8,0,1.9879466692606609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,float16,0,1.1805280049641926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,4,64,0,1,fp8,fp8,0,1.7677280108133953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,float16,0,2.0027146339416504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,8,64,128,1,float16,fp8,0,1.192207972208659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,8,64,128,1,fp8,fp8,0,1.1192906697591145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,float16,0,0.6664533217748007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,8,64,0,1,float16,fp8,0,2.0129920641581216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,float16,0,1.1016213099161785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,32,64,128,1,float16,fp8,0,0.6810719966888428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,8,64,0,1,fp8,fp8,0,1.7989546457926433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,32,64,128,1,fp8,fp8,0,0.6483840147654215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,float16,0,0.579584002494812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,32,64,0,1,float16,fp8,0,1.1176160176595051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,32,64,0,1,fp8,fp8,0,1.0062452952067058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,float16,0,0.9946293036142985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,1,64,128,1,float16,fp8,0,0.5850826501846313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,1,64,128,1,fp8,fp8,0,0.5459733406702677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,1,64,0,1,float16,fp8,0,1.000864028930664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,float16,0,0.585375984509786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,1,64,0,1,fp8,fp8,0,0.8902453581492106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,float16,0,1.001029332478841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,2,64,128,1,float16,fp8,0,0.5910400152206421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,2,64,128,1,fp8,fp8,0,0.5517066717147827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,2,64,0,1,float16,fp8,0,1.0057066281636555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,float16,0,0.5904426574707031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,2,64,0,1,fp8,fp8,0,0.8963413238525391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,float16,0,1.007482687632243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,4,64,128,1,float16,fp8,0,0.5971466700236002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,4,64,128,1,fp8,fp8,0,0.5579946835835775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,float16,0,0.6021120150883993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,4,64,0,1,float16,fp8,0,1.0132319927215576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,4,64,0,1,fp8,fp8,0,0.9033973217010498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,float16,0,1.0206080277760823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,8,64,128,1,float16,fp8,0,0.6108906666437784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,8,64,128,1,fp8,fp8,0,0.5705600182215372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,float16,0,0.34989333152770996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,8,64,0,1,float16,fp8,0,1.0272693634033203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,8,64,0,1,fp8,fp8,0,0.9194026788075765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,float16,0,0.5756426652272543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,32,64,128,1,float16,fp8,0,0.35785067081451416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,32,64,128,1,fp8,fp8,0,0.3410400152206421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,32,64,0,1,float16,fp8,0,0.5848639806111654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,32,64,0,1,fp8,fp8,0,0.5279200077056885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,float16,0,0.3039733370145162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,float16,0,0.5177706480026245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,1,64,128,1,float16,fp8,0,0.30801600217819214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,1,64,128,1,fp8,fp8,0,0.2923733393351237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,1,64,0,1,float16,fp8,0,0.5207093159357706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,1,64,0,1,fp8,fp8,0,0.4696693420410156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,float16,0,0.30712000528971356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,float16,0,0.5212533473968506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,2,64,128,1,float16,fp8,0,0.3107093373934428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,2,64,128,1,fp8,fp8,0,0.29398399591445923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,2,64,0,1,fp8,fp8,0,0.47408000628153485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,2,64,0,1,float16,fp8,0,0.5235626697540283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,float16,0,0.3103040059407552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,4,64,128,1,fp8,fp8,0,0.29634666442871094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,4,64,128,1,float16,fp8,0,0.31466132402420044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,float16,0,0.5258346796035767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,float16,0,0.31749866406122845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,4,64,0,1,fp8,fp8,0,0.47707200050354004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,4,64,0,1,float16,fp8,0,0.528218666712443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,8,64,128,1,fp8,fp8,0,0.3043359915415446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,8,64,128,1,float16,fp8,0,0.321343998114268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,float16,0,0.532256007194519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,float16,0,0.19138665994008383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,8,64,0,1,fp8,fp8,0,0.48287467161814374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,8,64,0,1,float16,fp8,0,0.5367146730422974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,float16,0,0.3109760085741679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,32,64,128,1,fp8,fp8,0,0.18985599279403687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,32,64,128,1,float16,fp8,0,0.19723733266194662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,float16,0,0.1641813317934672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,32,64,0,1,float16,fp8,0,0.31594665845235187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,32,64,0,1,fp8,fp8,0,0.2876746654510498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,1,64,128,1,float16,fp8,0,0.16662933429082236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,1,64,128,1,fp8,fp8,0,0.16263467073440552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,float16,0,0.2778453429539998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,1,64,0,1,float16,fp8,0,0.27963199218114215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,1,64,0,1,fp8,fp8,0,0.25673067569732666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,float16,0,0.1653386652469635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,2,64,128,1,float16,fp8,0,0.16801599661509195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,2,64,128,1,fp8,fp8,0,0.16273599863052368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,float16,0,0.2778826753298442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,float16,0,0.16696532567342123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,2,64,0,1,float16,fp8,0,0.2815893292427063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,2,64,0,1,fp8,fp8,0,0.2573546568552653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,4,64,128,1,float16,fp8,0,0.16923733552296957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,float16,0,0.28151466449101764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,4,64,128,1,fp8,fp8,0,0.16657599806785583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,4,64,0,1,float16,fp8,0,0.2828800082206726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,float16,0,0.17188799381256104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,4,64,0,1,fp8,fp8,0,0.2604159911473592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,8,64,128,1,float16,fp8,0,0.17457600434621176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,8,64,128,1,fp8,fp8,0,0.17064533631006876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,float16,0,0.28623465696970624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,float16,0,0.1125866671403249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,8,64,0,1,float16,fp8,0,0.29046932856241864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,float16,0,0.17749333381652832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,8,64,0,1,fp8,fp8,0,0.2650773326555888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,32,64,128,1,float16,fp8,0,0.11370666821797688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,32,64,128,1,fp8,fp8,0,0.11427733302116394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,32,64,0,1,float16,fp8,0,0.18092266718546549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,32,64,0,1,fp8,fp8,0,0.16714666287104288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,float16,0,0.0976693332195282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,1,64,128,1,float16,fp8,0,0.0990826686223348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,float16,0,0.16220800081888834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,1,64,128,1,fp8,fp8,0,0.09339732925097148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,1,64,0,1,float16,fp8,0,0.16276266177495322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,float16,0,0.09850133458773296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,1,64,0,1,fp8,fp8,0,0.14642133315404257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,2,64,128,1,float16,fp8,0,0.09917333722114563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,float16,0,0.16289599736531576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,2,64,128,1,fp8,fp8,0,0.09332266449928284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,2,64,0,1,float16,fp8,0,0.16291733582814535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,2,64,0,1,fp8,fp8,0,0.14661866426467896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,float16,0,0.0974079966545105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,float16,0,0.16263999541600546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,4,64,128,1,float16,fp8,0,0.09912533561388652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,4,64,128,1,fp8,fp8,0,0.09492799639701843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,4,64,0,1,float16,fp8,0,0.1630346675713857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,float16,0,0.0995306670665741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,4,64,0,1,fp8,fp8,0,0.14855466286341348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,float16,0,0.16451733311017355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,8,64,128,1,float16,fp8,0,0.10161067048708598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,8,64,128,1,fp8,fp8,0,0.09713600079218547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,8,64,0,1,float16,fp8,0,0.1666826605796814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,8,64,0,1,fp8,fp8,0,0.15038399895032248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,float16,0,0.08109333117802937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,float16,0,0.12025599678357442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,32,64,128,1,float16,fp8,0,0.08281066517035167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,32,64,128,1,fp8,fp8,0,0.07868266602357228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,32,64,0,1,float16,fp8,0,0.11991999546686809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,32,64,0,1,fp8,fp8,0,0.1097813347975413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,float16,0,0.08257066706816356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,float16,0,0.11958932876586914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,1,64,128,1,float16,fp8,0,0.08106666803359985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,1,64,128,1,fp8,fp8,0,0.07855999966462453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,1,64,0,1,float16,fp8,0,0.11983467141787212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,1,64,0,1,fp8,fp8,0,0.11123733719189961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,float16,0,0.08146133522192638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,float16,0,0.11962133646011353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,2,64,128,1,float16,fp8,0,0.08066666622956593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,2,64,128,1,fp8,fp8,0,0.07838400204976399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,2,64,0,1,float16,fp8,0,0.11983999609947205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,2,64,0,1,fp8,fp8,0,0.10960533221562703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,float16,0,0.08239999910195668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,float16,0,0.11972799897193909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,4,64,128,1,fp8,fp8,0,0.07893866797288258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,4,64,128,1,float16,fp8,0,0.08124266564846039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,4,64,0,1,float16,fp8,0,0.12165333827336629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,4,64,0,1,fp8,fp8,0,0.11044266819953918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,float16,0,0.08184533317883809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,float16,0,0.11987732847531636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,8,64,128,1,float16,fp8,0,0.08067733546098073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,8,64,128,1,fp8,fp8,0,0.07896000146865845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,8,64,0,1,float16,fp8,0,0.1200266679128011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,8,64,0,1,fp8,fp8,0,0.10960533221562703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,float16,0,1.5245812733968098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,1,64,128,1,float16,fp8,0,1.5315146446228027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,float16,0,2.1614452997843423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,1,64,128,1,fp8,fp8,0,1.4071413675944011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,1,64,0,1,float16,fp8,0,2.1626292864481607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,1,64,0,1,fp8,fp8,0,1.921573321024577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,float16,0,1.5432213147481282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,2,64,128,1,float16,fp8,0,1.5509707132975261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,float16,0,2.172272046407064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,2,64,128,1,fp8,fp8,0,1.4214080174763997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,2,64,0,1,fp8,fp8,0,1.9336320559183757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,2,64,0,1,float16,fp8,0,2.177194595336914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,float16,0,1.5803732872009277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,float16,0,2.2101866404215493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,4,64,128,1,fp8,fp8,0,1.4443626403808594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,4,64,128,1,float16,fp8,0,1.573306719462077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,4,64,0,1,float16,fp8,0,2.204106648763021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,4,64,0,1,fp8,fp8,0,1.9596479733784993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,float16,0,1.611013412475586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,float16,0,2.2505067189534507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,8,64,128,1,fp8,fp8,0,1.4741600354512532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,8,64,128,1,float16,fp8,0,1.6018667221069336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,float16,0,0.8746399879455566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,8,64,0,1,float16,fp8,0,2.2371573448181152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,float16,0,1.2205866972605388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,8,64,0,1,fp8,fp8,0,1.9937280019124348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,32,64,128,1,float16,fp8,0,0.8905920187632242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,32,64,128,1,fp8,fp8,0,0.8491626580556234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,32,64,0,1,float16,fp8,0,1.234613339106242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,float16,0,0.7582826614379883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,32,64,0,1,fp8,fp8,0,1.1311733722686768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,float16,0,1.0757866700490315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,1,64,128,1,float16,fp8,0,0.7663413683573405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,1,64,128,1,fp8,fp8,0,0.7119680245717367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,1,64,0,1,float16,fp8,0,1.083066701889038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,1,64,0,1,fp8,fp8,0,0.970240036646525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,float16,0,0.7662560145060221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,float16,0,1.0847466786702473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,2,64,128,1,float16,fp8,0,0.77402130762736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,2,64,128,1,fp8,fp8,0,0.7196853160858154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,2,64,0,1,float16,fp8,0,1.0917387008666992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,float16,0,0.7741440137227377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,2,64,0,1,fp8,fp8,0,0.9797493616739908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,float16,0,1.0950133005777996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,4,64,128,1,float16,fp8,0,0.7818079789479574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,4,64,128,1,fp8,fp8,0,0.7283093134562174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,4,64,0,1,float16,fp8,0,1.1022133032480876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,float16,0,0.7864533265431722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,4,64,0,1,fp8,fp8,0,0.9882187048594157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,float16,0,1.109877347946167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,8,64,128,1,float16,fp8,0,0.7962720394134521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,8,64,128,1,fp8,fp8,0,0.7435572942097982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,float16,0,0.45017067591349286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,8,64,0,1,float16,fp8,0,1.1174773375193279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,8,64,0,1,fp8,fp8,0,1.0077706972757976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,32,64,128,1,float16,fp8,0,0.46003198623657227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,float16,0,0.6266986529032389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,32,64,128,1,fp8,fp8,0,0.4392160177230835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,32,64,0,1,float16,fp8,0,0.6367253462473551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,32,64,0,1,fp8,fp8,0,0.5819093386332194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,float16,0,0.3901919921239217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,float16,0,0.5536799828211466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,1,64,128,1,float16,fp8,0,0.3942986726760864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,1,64,128,1,fp8,fp8,0,0.37007999420166016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,1,64,0,1,float16,fp8,0,0.5585386753082275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,1,64,0,1,fp8,fp8,0,0.5044000148773193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,float16,0,0.3948959906895955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,float16,0,0.5582026640574137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,2,64,128,1,float16,fp8,0,0.39895466963450116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,2,64,128,1,fp8,fp8,0,0.37563733259836835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,2,64,0,1,float16,fp8,0,0.562549352645874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,2,64,0,1,fp8,fp8,0,0.5092373291651408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,float16,0,0.3990186850229899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,float16,0,0.5627839962641398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,4,64,128,1,float16,fp8,0,0.40435731410980225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,4,64,128,1,fp8,fp8,0,0.3778826793034871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,4,64,0,1,float16,fp8,0,0.5679680109024048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,float16,0,0.40670935312906903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,4,64,0,1,fp8,fp8,0,0.5126773516337076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,float16,0,0.5720800161361694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,8,64,128,1,float16,fp8,0,0.41208000977834064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,8,64,128,1,fp8,fp8,0,0.3855839967727661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,8,64,0,1,float16,fp8,0,0.5761919816335043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,float16,0,0.23967466751734415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,float16,0,0.33110400040944415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,8,64,0,1,fp8,fp8,0,0.5230079889297485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,32,64,128,1,float16,fp8,0,0.24451732635498047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,32,64,128,1,fp8,fp8,0,0.2352799971898397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,32,64,0,1,fp8,fp8,0,0.31084267298380536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,32,64,0,1,float16,fp8,0,0.3375306526819865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,float16,0,0.20354666312535605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,float16,0,0.2900906602541606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,1,64,128,1,fp8,fp8,0,0.1979680061340332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,1,64,128,1,float16,fp8,0,0.2055520017941793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,1,64,0,1,float16,fp8,0,0.2927466630935669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,float16,0,0.20449066162109375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,1,64,0,1,fp8,fp8,0,0.2698613405227661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,float16,0,0.29286400477091473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,2,64,128,1,float16,fp8,0,0.20821332931518555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,2,64,128,1,fp8,fp8,0,0.2015626629193624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,2,64,0,1,float16,fp8,0,0.29497067133585614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,float16,0,0.2085226575533549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,2,64,0,1,fp8,fp8,0,0.27139200766881305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,float16,0,0.2955999970436096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,4,64,128,1,float16,fp8,0,0.21195733547210693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,4,64,128,1,fp8,fp8,0,0.20382400353749594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,float16,0,0.21609065930048624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,4,64,0,1,float16,fp8,0,0.29798932870229083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,4,64,0,1,fp8,fp8,0,0.2754986683527629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,8,64,128,1,float16,fp8,0,0.2183893322944641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,8,64,128,1,fp8,fp8,0,0.20895467201868692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,float16,0,0.30105600754419964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,float16,0,0.13190399607022604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,8,64,0,1,fp8,fp8,0,0.2805866599082947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,8,64,0,1,float16,fp8,0,0.3060906728108724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,float16,0,0.18317866325378418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,32,64,128,1,float16,fp8,0,0.13590400417645773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,32,64,128,1,fp8,fp8,0,0.13332266608874002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,32,64,0,1,float16,fp8,0,0.1869386633237203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,float16,0,0.11140799522399902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,32,64,0,1,fp8,fp8,0,0.17289066314697266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,float16,0,0.15918933351834616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,1,64,128,1,float16,fp8,0,0.11216533184051514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,1,64,128,1,fp8,fp8,0,0.10594666997591655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,1,64,0,1,float16,fp8,0,0.16085333625475565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,float16,0,0.11136533816655476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,1,64,0,1,fp8,fp8,0,0.14633599917093912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,2,64,128,1,float16,fp8,0,0.11181867122650146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,2,64,128,1,fp8,fp8,0,0.10930132865905762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,float16,0,0.15869333346684775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,2,64,0,1,float16,fp8,0,0.1609493295351664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,2,64,0,1,fp8,fp8,0,0.1481119990348816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,float16,0,0.11171733339627583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,4,64,128,1,fp8,fp8,0,0.11136000355084737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,4,64,128,1,float16,fp8,0,0.11545600493748982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,float16,0,0.16165866454442343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,float16,0,0.11575999855995178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,4,64,0,1,fp8,fp8,0,0.15030399958292642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,4,64,0,1,float16,fp8,0,0.1632266640663147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,8,64,128,1,float16,fp8,0,0.11753066380818684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,float16,0,0.16521599888801575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,8,64,128,1,fp8,fp8,0,0.11610666910807292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,float16,0,0.07675733168919881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,8,64,0,1,fp8,fp8,0,0.15567466616630554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,8,64,0,1,float16,fp8,0,0.16588800152142844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,float16,0,0.1034346620241801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,32,64,128,1,float16,fp8,0,0.07874133189519246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,32,64,128,1,fp8,fp8,0,0.07896000146865845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,32,64,0,1,float16,fp8,0,0.10523733496665955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,32,64,0,1,fp8,fp8,0,0.1033066709836324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,float16,0,0.070783997575442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,1,64,128,1,float16,fp8,0,0.07257066667079926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,1,64,128,1,fp8,fp8,0,0.06850133339564006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,float16,0,0.09885332981745402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,1,64,0,1,float16,fp8,0,0.09752532839775085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,float16,0,0.07236800094445546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,1,64,0,1,fp8,fp8,0,0.09212799866994222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,2,64,128,1,float16,fp8,0,0.07136000196139018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,2,64,128,1,fp8,fp8,0,0.06871999800205231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,fp8,0,0.0990826686223348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,2,64,0,1,float16,float16,0,0.09892800450325012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,float16,0,0.07234666744867961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,2,64,0,1,fp8,fp8,0,0.09095999598503113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,float16,0,0.09909333785374959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,4,64,128,1,float16,fp8,0,0.0727893312772115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,4,64,128,1,fp8,fp8,0,0.06837333242098491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,4,64,0,1,float16,fp8,0,0.09911466638247173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,4,64,0,1,fp8,fp8,0,0.0912000040213267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,fp8,0,0.07463466624418895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,float16,0,0.0990666647752126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,8,64,128,1,float16,float16,0,0.07257600128650665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,8,64,128,1,fp8,fp8,0,0.07056533296902974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,8,64,0,1,float16,fp8,0,0.10109333197275798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,8,64,0,1,fp8,fp8,0,0.09136000275611877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,float16,0,0.0786186655362447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,float16,0,0.05885866781075796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,32,64,128,1,float16,fp8,0,0.05971199770768484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,32,64,128,1,fp8,fp8,0,0.05649599929650625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,32,64,0,1,float16,fp8,0,0.07898133496443431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,32,64,0,1,fp8,fp8,0,0.07374933362007141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,float16,0,0.059157331784566246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,float16,0,0.0784800002972285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,1,64,128,1,float16,fp8,0,0.05819733440876007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,1,64,128,1,fp8,fp8,0,0.057114665706952415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,1,64,0,1,float16,fp8,0,0.0784693310658137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,1,64,0,1,fp8,fp8,0,0.07282666862010956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,float16,0,0.0583840012550354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,2,64,128,1,float16,fp8,0,0.05813866853713989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,2,64,128,1,fp8,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,float16,0,0.07870399951934814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,2,64,0,1,float16,fp8,0,0.07868800063927968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,2,64,0,1,fp8,fp8,0,0.07235733171304067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,float16,0,0.058373332023620605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,4,64,128,1,float16,fp8,0,0.05823466678460439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,float16,0,0.0788800021012624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,4,64,128,1,fp8,fp8,0,0.05835733314355215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,4,64,0,1,float16,fp8,0,0.07875733574231465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,4,64,0,1,fp8,fp8,0,0.0726986676454544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,float16,0,0.059338668982187905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,float16,0,0.07874666651089986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,8,64,128,1,float16,fp8,0,0.05830933153629303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,8,64,128,1,fp8,fp8,0,0.05817066629727682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,8,64,0,1,float16,fp8,0,0.07901333272457123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,8,64,0,1,fp8,fp8,0,0.07306666672229767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,float16,0,1.141861359278361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,1,64,128,1,float16,fp8,0,1.1494613488515217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,float16,0,1.450522740681966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,1,64,128,1,fp8,fp8,0,1.0614293416341145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,1,64,0,1,fp8,fp8,0,1.3091359933217366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,1,64,0,1,float16,fp8,0,1.451050599416097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,float16,0,1.1678880055745442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,2,64,128,1,float16,fp8,0,1.1692746480305989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,float16,0,1.4717119534810383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,2,64,128,1,fp8,fp8,0,1.0779626369476318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,2,64,0,1,float16,fp8,0,1.4743040402730305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,2,64,0,1,fp8,fp8,0,1.3242026964823406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,float16,0,1.1805226802825928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,4,64,128,1,fp8,fp8,0,1.0910666783650715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,4,64,128,1,float16,fp8,0,1.1824053128560383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,float16,0,1.4858986536661785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,4,64,0,1,float16,fp8,0,1.4881332715352376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,4,64,0,1,fp8,fp8,0,1.3371413548787434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,float16,0,1.2038506666819255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,float16,0,1.5118826230367024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,8,64,128,1,float16,fp8,0,1.2017066478729248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,8,64,128,1,fp8,fp8,0,1.1158453623453777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,float16,0,0.6655840078989664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,8,64,0,1,float16,fp8,0,1.5153279304504395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,8,64,0,1,fp8,fp8,0,1.3643040657043457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,32,64,128,1,float16,fp8,0,0.6745440165201823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,float16,0,0.8380053043365479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,32,64,128,1,fp8,fp8,0,0.6425386667251587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,32,64,0,1,float16,fp8,0,0.8476746877034506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,float16,0,0.5728106498718262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,32,64,0,1,fp8,fp8,0,0.7782133420308431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,float16,0,0.7279520034790039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,1,64,128,1,fp8,fp8,0,0.5385760068893433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,1,64,128,1,float16,fp8,0,0.5790239969889323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,1,64,0,1,float16,fp8,0,0.730778694152832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,1,64,0,1,fp8,fp8,0,0.6626186768213908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,float16,0,0.5810079971949259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,float16,0,0.7358187039693197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,2,64,128,1,float16,fp8,0,0.5869866609573364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,2,64,128,1,fp8,fp8,0,0.544922669728597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,2,64,0,1,float16,fp8,0,0.7409226894378662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,2,64,0,1,fp8,fp8,0,0.6691946983337402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,float16,0,0.5870186487833658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,float16,0,0.7435626983642578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,4,64,128,1,float16,fp8,0,0.5908053318659464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,4,64,128,1,fp8,fp8,0,0.5505226850509644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,4,64,0,1,float16,fp8,0,0.746992031733195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,4,64,0,1,fp8,fp8,0,0.6789013544718424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,float16,0,0.5970079898834229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,float16,0,0.7536746660868326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,8,64,128,1,float16,fp8,0,0.6028800010681152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,8,64,128,1,fp8,fp8,0,0.5644746621449789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,float16,0,0.3436586856842041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,8,64,0,1,float16,fp8,0,0.7646400133768717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,float16,0,0.4330879847208659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,8,64,0,1,fp8,fp8,0,0.6922667026519775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,32,64,128,1,float16,fp8,0,0.35046398639678955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,32,64,128,1,fp8,fp8,0,0.33613868554433185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,32,64,0,1,float16,fp8,0,0.4405866861343384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,32,64,0,1,fp8,fp8,0,0.406490683555603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,float16,0,0.29471999406814575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,float16,0,0.3757866621017456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,1,64,128,1,float16,fp8,0,0.29919999837875366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,1,64,128,1,fp8,fp8,0,0.2815626660982768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,1,64,0,1,float16,fp8,0,0.3782080014546712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,1,64,0,1,fp8,fp8,0,0.3471946716308594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,float16,0,0.2964106599489848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,float16,0,0.3765866756439209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,2,64,128,1,float16,fp8,0,0.3001706600189209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,2,64,128,1,fp8,fp8,0,0.2866133252779643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,2,64,0,1,float16,fp8,0,0.38069331645965576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,2,64,0,1,fp8,fp8,0,0.3513866662979126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,float16,0,0.30347200234731037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,float16,0,0.38490132490793866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,4,64,128,1,float16,fp8,0,0.30608532826105755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,4,64,128,1,fp8,fp8,0,0.2896160085995992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,4,64,0,1,float16,fp8,0,0.38633068402608234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,float16,0,0.31005332867304486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,4,64,0,1,fp8,fp8,0,0.3546666701634725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,8,64,128,1,float16,fp8,0,0.3137493332227071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,float16,0,0.39217066764831543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,8,64,128,1,fp8,fp8,0,0.29580267270406085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,8,64,0,1,float16,fp8,0,0.396448016166687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,float16,0,0.18326934178670248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,float16,0,0.23094934225082397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,8,64,0,1,fp8,fp8,0,0.36156801382700604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,32,64,128,1,float16,fp8,0,0.18806399901707968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,32,64,128,1,fp8,fp8,0,0.18023467063903809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,float16,0,0.153221329053243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,32,64,0,1,fp8,fp8,0,0.21868266661961874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,32,64,0,1,float16,fp8,0,0.23504533370335898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,float16,0,0.1962453325589498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,1,64,128,1,float16,fp8,0,0.15591999888420105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,1,64,128,1,fp8,fp8,0,0.15242666999499002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,float16,0,0.15618667006492615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,1,64,0,1,float16,fp8,0,0.19981332619984946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,1,64,0,1,fp8,fp8,0,0.1874879995981852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,2,64,128,1,float16,fp8,0,0.15795200069745383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,2,64,128,1,fp8,fp8,0,0.15244799852371216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,float16,0,0.19926400979359946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,2,64,0,1,float16,fp8,0,0.20089600483576456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,2,64,0,1,fp8,fp8,0,0.18717867136001587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,float16,0,0.15873066584269205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,4,64,128,1,float16,fp8,0,0.1607093314329783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,4,64,128,1,fp8,fp8,0,0.15702933073043823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,float16,0,0.20174400011698404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,float16,0,0.16286399960517883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,4,64,0,1,fp8,fp8,0,0.19148266315460205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,4,64,0,1,float16,fp8,0,0.2036479910214742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,8,64,128,1,float16,fp8,0,0.16521066427230835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,8,64,128,1,fp8,fp8,0,0.16113066673278809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,float16,0,0.2071466644605001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,float16,0,0.10116799672444661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,8,64,0,1,fp8,fp8,0,0.19650665918986002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,8,64,0,1,float16,fp8,0,0.2089973290761312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,float16,0,0.12773332993189493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,32,64,128,1,float16,fp8,0,0.1037066678206126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,32,64,128,1,fp8,fp8,0,0.10363733768463135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,32,64,0,1,float16,fp8,0,0.1309866706530253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,float16,0,0.08931199709574382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,32,64,0,1,fp8,fp8,0,0.1236853301525116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,float16,0,0.11193066835403442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,1,64,128,1,fp8,fp8,0,0.08480000495910645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,1,64,128,1,float16,fp8,0,0.08918933073679607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,1,64,0,1,float16,fp8,0,0.11379200220108032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,float16,0,0.08870399991671245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,1,64,0,1,fp8,fp8,0,0.10327466328938802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,float16,0,0.11326400438944499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,2,64,128,1,float16,fp8,0,0.08884800473848979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,2,64,128,1,fp8,fp8,0,0.08481599887212117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,float16,0,0.08886933326721191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,2,64,0,1,float16,fp8,0,0.11332266529401143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,2,64,0,1,fp8,fp8,0,0.10451733072598775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,4,64,128,1,float16,fp8,0,0.08960533142089844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,4,64,128,1,fp8,fp8,0,0.0849173367023468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,float16,0,0.11188266674677531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,4,64,0,1,float16,fp8,0,0.11449066797892253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,float16,0,0.08994133273760478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,4,64,0,1,fp8,fp8,0,0.10538132985432942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,8,64,128,1,float16,fp8,0,0.09294933080673218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,float16,0,0.11453333497047424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,8,64,128,1,fp8,fp8,0,0.0888320008913676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,float16,0,0.06039999922116598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,8,64,0,1,fp8,fp8,0,0.10584533214569092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,8,64,0,1,float16,fp8,0,0.11586133639017741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,float16,0,0.07654400169849396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,32,64,128,1,float16,fp8,0,0.061941335598627724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,32,64,128,1,fp8,fp8,0,0.060517330964406334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,32,64,0,1,float16,fp8,0,0.07828266421953838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,32,64,0,1,fp8,fp8,0,0.0729013333717982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,float16,0,0.05611200133959452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,1,64,128,1,fp8,fp8,0,0.055999999245007835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,1,64,128,1,float16,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,float16,0,0.07252799967924754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,1,64,0,1,float16,fp8,0,0.07389333347479503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,1,64,0,1,fp8,fp8,0,0.0680266668399175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,float16,0,0.057802667220433555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,2,64,128,1,float16,fp8,0,0.05789333085219065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,2,64,128,1,fp8,fp8,0,0.05601066847642263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,float16,0,0.07285866638024648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,2,64,0,1,float16,fp8,0,0.07446399827798207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,float16,0,0.05762666463851929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,2,64,0,1,fp8,fp8,0,0.06839466591676076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,4,64,128,1,float16,fp8,0,0.05816533168156942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,float16,0,0.0729066679875056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,4,64,128,1,fp8,fp8,0,0.05579733351866404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,4,64,0,1,float16,fp8,0,0.07249066730340321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,4,64,0,1,fp8,fp8,0,0.06865600248177846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,float16,0,0.056661332647005715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,float16,0,0.07249600191911061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,8,64,128,1,float16,fp8,0,0.058176000912984215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,8,64,128,1,fp8,fp8,0,0.056218668818473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,8,64,0,1,fp8,fp8,0,0.06830933193365733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,8,64,0,1,float16,fp8,0,0.0749120016892751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,float16,0,0.04990933338801066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,float16,0,0.05808533231417338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,32,64,128,1,float16,fp8,0,0.04981866478919983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,32,64,0,1,float16,fp8,0,0.058778668443361916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,32,64,128,1,fp8,fp8,0,0.04781866570313772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,32,64,0,1,fp8,fp8,0,0.055919999877611794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,float16,0,0.04977599779764811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,float16,0,0.06014933188756307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,1,64,128,1,fp8,fp8,0,0.048207998275756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,1,64,128,1,float16,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,1,64,0,1,float16,fp8,0,0.05991999804973602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,1,64,0,1,fp8,fp8,0,0.05603733162085215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,float16,0,0.04996266464392344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,float16,0,0.05940799911816915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,2,64,128,1,float16,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,2,64,128,1,fp8,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,2,64,0,1,float16,fp8,0,0.05842666824658712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,2,64,0,1,fp8,fp8,0,0.05485333502292633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,float16,0,0.05009600023428599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,float16,0,0.05810666580994924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,4,64,128,1,float16,fp8,0,0.0496373325586319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,4,64,128,1,fp8,fp8,0,0.04771733283996582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,4,64,0,1,float16,fp8,0,0.05851200222969055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,4,64,0,1,fp8,fp8,0,0.05598400036493937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,float16,0,0.049584001302719116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,float16,0,0.06006933252016703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,8,64,128,1,float16,fp8,0,0.05035200218359629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,8,64,128,1,fp8,fp8,0,0.04822933177153269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,8,64,0,1,float16,fp8,0,0.060319999853769936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,8,64,0,1,fp8,fp8,0,0.055999999245007835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,float16,0,1.3820959726969402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,1,64,128,1,float16,fp8,0,1.3751254081726074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,float16,0,1.566325346628825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,1,64,128,1,fp8,fp8,0,1.3286933104197185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,1,64,0,1,fp8,fp8,0,1.4745814005533855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,1,64,0,1,float16,fp8,0,1.560149351755778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,float16,0,1.3942559560139973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,2,64,128,1,float16,fp8,0,1.3919626871744792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,float16,0,1.5780480702718098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,2,64,128,1,fp8,fp8,0,1.3221279780069988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,2,64,0,1,float16,fp8,0,1.5815040270487468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,float16,0,1.426586627960205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,2,64,0,1,fp8,fp8,0,1.461664040883382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,float16,0,1.6217600504557292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,4,64,128,1,float16,fp8,0,1.4288053512573242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,4,64,128,1,fp8,fp8,0,1.5419680277506511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,4,64,0,1,float16,fp8,0,1.6192320187886555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,4,64,0,1,fp8,fp8,0,1.6857120196024578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,float16,0,1.411824067433675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,float16,0,1.6072266896565754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,8,64,128,1,float16,fp8,0,1.3830827077229817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,float16,0,0.7258506615956625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,8,64,128,1,fp8,fp8,0,1.5278666814168294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,8,64,0,1,float16,fp8,0,1.5858346621195476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,8,64,0,1,fp8,fp8,0,1.6677066485087078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,32,64,128,1,float16,fp8,0,0.7091946601867676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,32,64,128,1,fp8,fp8,0,0.7471093336741129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,float16,0,0.8363306522369385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,32,64,0,1,float16,fp8,0,0.8239680131276449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,float16,0,0.7007733186086019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,32,64,0,1,fp8,fp8,0,0.833130677541097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,float16,0,0.7946613629659017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,1,64,128,1,float16,fp8,0,0.6940053304036459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,1,64,128,1,fp8,fp8,0,0.6574826637903849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,1,64,0,1,float16,fp8,0,0.7898773352305094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,1,64,0,1,fp8,fp8,0,0.729535977045695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,float16,0,0.7046826680501302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,float16,0,0.8033013343811035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,2,64,128,1,fp8,fp8,0,0.6588213443756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,2,64,128,1,float16,fp8,0,0.7029919624328613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,2,64,0,1,float16,fp8,0,0.7974452972412109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,2,64,0,1,fp8,fp8,0,0.73362135887146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,float16,0,0.7187679608662924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,float16,0,0.8146986961364746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,4,64,128,1,float16,fp8,0,0.7129279772440592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,4,64,128,1,fp8,fp8,0,0.7588693300882975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,4,64,0,1,float16,fp8,0,0.8116426467895508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,4,64,0,1,fp8,fp8,0,0.8293440341949463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,float16,0,0.7040586471557617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,float16,0,0.802560011545817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,8,64,128,1,float16,fp8,0,0.6964320341746012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,8,64,128,1,fp8,fp8,0,0.7554666996002197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,8,64,0,1,float16,fp8,0,0.7959413528442383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,float16,0,0.3722826639811198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,float16,0,0.429637352625529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,32,64,128,1,float16,fp8,0,0.36508798599243164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,8,64,0,1,fp8,fp8,0,0.8340906302134196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,32,64,128,1,fp8,fp8,0,0.3779040177663167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,32,64,0,1,float16,fp8,0,0.4222453435262044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,32,64,0,1,fp8,fp8,0,0.42346668243408203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,float16,0,0.3586239814758301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,float16,0,0.40750400225321454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,1,64,128,1,float16,fp8,0,0.3556319872538249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,1,64,128,1,fp8,fp8,0,0.338917334874471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,1,64,0,1,float16,fp8,0,0.4055200020472209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,1,64,0,1,fp8,fp8,0,0.37495466073354083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,float16,0,0.3630613485972087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,float16,0,0.4124159812927246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,2,64,128,1,float16,fp8,0,0.36143465836842853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,2,64,128,1,fp8,fp8,0,0.33873601754506427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,2,64,0,1,float16,fp8,0,0.4103039900461833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,2,64,0,1,fp8,fp8,0,0.37565867106119794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,float16,0,0.3691573143005371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,float16,0,0.4190400044123332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,4,64,128,1,float16,fp8,0,0.3665813207626343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,4,64,128,1,fp8,fp8,0,0.3676266670227051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,4,64,0,1,float16,fp8,0,0.4169173240661621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,4,64,0,1,fp8,fp8,0,0.40697598457336426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,float16,0,0.360485315322876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,float16,0,0.4123573303222656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,8,64,128,1,float16,fp8,0,0.357589324315389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,8,64,128,1,fp8,fp8,0,0.36987733840942383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,float16,0,0.1965279976526896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,8,64,0,1,float16,fp8,0,0.4082719882329305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,8,64,0,1,fp8,fp8,0,0.4018719991048177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,32,64,128,1,float16,fp8,0,0.193231999874115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,float16,0,0.22659732898076376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,32,64,128,1,fp8,fp8,0,0.20094933112462363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,32,64,0,1,float16,fp8,0,0.22542933622996011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,32,64,0,1,fp8,fp8,0,0.22331732511520386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,float16,0,0.1880693236986796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,float16,0,0.2140106757481893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,1,64,128,1,float16,fp8,0,0.187226672967275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,1,64,128,1,fp8,fp8,0,0.17730132738749185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,1,64,0,1,float16,fp8,0,0.21171200275421143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,1,64,0,1,fp8,fp8,0,0.1981333295504252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,float16,0,0.18900799751281738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,float16,0,0.21606934070587158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,2,64,128,1,float16,fp8,0,0.18977065881093344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,2,64,128,1,fp8,fp8,0,0.1779573361078898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,2,64,0,1,float16,fp8,0,0.21556266148885092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,2,64,0,1,fp8,fp8,0,0.19818133115768433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,float16,0,0.19378133614857992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,float16,0,0.2211093306541443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,4,64,128,1,float16,fp8,0,0.19221333662668863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,4,64,128,1,fp8,fp8,0,0.19035200277964273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,4,64,0,1,float16,fp8,0,0.21920533974965414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,4,64,0,1,fp8,fp8,0,0.2086826761563619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,float16,0,0.1901280085245768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,float16,0,0.21639466285705566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,8,64,128,1,float16,fp8,0,0.18763200441996256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,8,64,128,1,fp8,fp8,0,0.18684266010920206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,float16,0,0.10878400007883708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,8,64,0,1,float16,fp8,0,0.2161066730817159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,8,64,0,1,fp8,fp8,0,0.2109760046005249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,float16,0,0.12574399511019388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,32,64,128,1,float16,fp8,0,0.1076746682325999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,32,64,128,1,fp8,fp8,0,0.11072533329327901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,32,64,0,1,float16,fp8,0,0.1237546702226003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,32,64,0,1,fp8,fp8,0,0.12308266758918762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,float16,0,0.10146133104960124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,float16,0,0.11541866262753804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,1,64,128,1,float16,fp8,0,0.1009386678536733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,1,64,128,1,fp8,fp8,0,0.09512000282605489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,1,64,0,1,float16,fp8,0,0.11620266238848369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,1,64,0,1,fp8,fp8,0,0.10724799831708272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,float16,0,0.10147733489672343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,float16,0,0.11822400490442912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,2,64,128,1,float16,fp8,0,0.10216533144315083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,2,64,128,1,fp8,fp8,0,0.09693866968154907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,2,64,0,1,float16,fp8,0,0.11644267042477925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,2,64,0,1,fp8,fp8,0,0.10958932836850484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,float16,0,0.10362666845321655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,float16,0,0.11867733796437581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,4,64,128,1,float16,fp8,0,0.10179733236630757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,4,64,128,1,fp8,fp8,0,0.10178132851918538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,4,64,0,1,float16,fp8,0,0.11763733625411987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,4,64,0,1,fp8,fp8,0,0.11285866300264995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,float16,0,0.1021066705385844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,float16,0,0.11709866921106975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,8,64,128,1,float16,fp8,0,0.10240532954533894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,8,64,128,1,fp8,fp8,0,0.1034453312555949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,8,64,0,1,float16,fp8,0,0.11768000324567159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,8,64,0,1,fp8,fp8,0,0.11540800333023071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,float16,0,0.05871466795603434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,float16,0,0.06871999800205231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,32,64,128,1,float16,fp8,0,0.05835199852784475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,32,64,128,1,fp8,fp8,0,0.06215466558933258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,32,64,0,1,float16,fp8,0,0.06803733110427856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,32,64,0,1,fp8,fp8,0,0.07052800059318542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,float16,0,0.058378666639328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,float16,0,0.06646400193373363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,1,64,128,1,float16,fp8,0,0.05807999769846598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,1,64,128,1,fp8,fp8,0,0.05589333176612854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,1,64,0,1,float16,fp8,0,0.06709866722424825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,1,64,0,1,fp8,fp8,0,0.06233599781990051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,float16,0,0.058304001887639366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,float16,0,0.06763199965159099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,2,64,128,1,float16,fp8,0,0.059631998340288796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,2,64,128,1,fp8,fp8,0,0.056501333912213646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,2,64,0,1,float16,fp8,0,0.06831466654936473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,2,64,0,1,fp8,fp8,0,0.06243200103441874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,float16,0,0.060175999999046326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,float16,0,0.06877333422501881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,4,64,128,1,float16,fp8,0,0.059893334905306496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,4,64,128,1,fp8,fp8,0,0.05880533158779144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,4,64,0,1,fp8,fp8,0,0.06589333216349284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,4,64,0,1,float16,fp8,0,0.06828799843788147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,float16,0,0.058559998869895935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,float16,0,0.06829866766929626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,8,64,128,1,float16,fp8,0,0.05819733440876007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,8,64,128,1,fp8,fp8,0,0.05820266902446747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,8,64,0,1,float16,fp8,0,0.06835199892520905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,float16,0,0.03794133414824804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,8,64,0,1,fp8,fp8,0,0.06597333153088887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,float16,0,0.043791999419530235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,32,64,128,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,32,64,128,1,fp8,fp8,0,0.03755733370780945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,32,64,0,1,float16,fp8,0,0.04578666885693868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,32,64,0,1,fp8,fp8,0,0.043968002001444496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,float16,0,0.043893332282702126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,1,64,128,1,float16,fp8,0,0.037461332976818085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,1,64,128,1,fp8,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,1,64,0,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,1,64,0,1,fp8,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,float16,0,0.037733333806196846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,float16,0,0.0444106658299764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,2,64,128,1,float16,fp8,0,0.038032000263532005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,2,64,128,1,fp8,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,2,64,0,1,float16,fp8,0,0.04552533229192098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,2,64,0,1,fp8,fp8,0,0.04204800228277842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,float16,0,0.03938133269548416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,float16,0,0.04599999884764353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,4,64,128,1,float16,fp8,0,0.03950933367013931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,4,64,128,1,fp8,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,4,64,0,1,float16,fp8,0,0.04561600089073181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,4,64,0,1,fp8,fp8,0,0.04320533573627472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,float16,0,0.03777066618204117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,float16,0,0.04394666850566864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,8,64,128,1,float16,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,8,64,128,1,fp8,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,8,64,0,1,float16,fp8,0,0.04563199977080027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,8,64,0,1,fp8,fp8,0,0.04363733530044556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,float16,0,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,float16,0,0.030218665798505146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,32,64,128,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,32,64,128,1,fp8,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,32,64,0,1,float16,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,32,64,0,1,fp8,fp8,0,0.029194665451844532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,float16,0,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,float16,0,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,1,64,128,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,1,64,128,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,1,64,0,1,float16,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,1,64,0,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,float16,0,0.029322666426499683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,2,64,128,1,float16,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,2,64,128,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,2,64,0,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,2,64,0,1,fp8,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,4,64,128,1,float16,fp8,0,0.02588266630967458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,4,64,128,1,fp8,fp8,0,0.02587199956178665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,4,64,0,1,float16,fp8,0,0.03134933362404505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,4,64,0,1,fp8,fp8,0,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,8,64,128,1,float16,fp8,0,0.025445332129796345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,8,64,128,1,fp8,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,8,64,0,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,8,64,0,1,fp8,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,1,64,128,1,float16,float16,0,1.3414133389790852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,1,64,0,1,float16,float16,0,1.3427786827087402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,1,64,128,1,float16,fp8,0,1.3309973080952961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,1,64,128,1,fp8,fp8,0,1.303221305211385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,1,64,0,1,fp8,fp8,0,1.2788320382436116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,1,64,0,1,float16,fp8,0,1.3315359751383464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,2,64,128,1,float16,float16,0,1.3524692853291829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,2,64,128,1,float16,fp8,0,1.3506879806518555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,2,64,0,1,float16,float16,0,1.3562080065409343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,2,64,128,1,fp8,fp8,0,1.2786239782969158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,2,64,0,1,float16,fp8,0,1.343781312306722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,2,64,0,1,fp8,fp8,0,1.271013339360555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,4,64,128,1,float16,float16,0,1.3916640281677246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,4,64,0,1,float16,float16,0,1.3959466616312664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,4,64,128,1,float16,fp8,0,1.3770559628804524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,4,64,128,1,fp8,fp8,0,1.5047732988993328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,4,64,0,1,float16,fp8,0,1.3886879285176594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,4,64,0,1,fp8,fp8,0,1.4860000610351562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,8,64,128,1,float16,float16,0,1.3789280255635579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,8,64,0,1,float16,float16,0,1.3778400421142578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,8,64,128,1,float16,fp8,0,1.34772793451945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,8,64,128,1,fp8,fp8,0,1.4880960782368977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,32,64,128,1,float16,float16,0,0.702618678410848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,8,64,0,1,float16,fp8,0,1.346117337544759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,32,64,0,1,float16,float16,0,0.7139039834340414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,8,64,0,1,fp8,fp8,0,1.483455975850423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,32,64,128,1,float16,fp8,0,0.6891146500905355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,32,64,128,1,fp8,fp8,0,0.7118879954020182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,32,64,0,1,fp8,fp8,0,0.7125066916147867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,32,64,0,1,float16,fp8,0,0.7009279727935791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,1,64,128,1,float16,float16,0,0.6800853411356608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,1,64,0,1,float16,float16,0,0.6783040364583334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,1,64,128,1,float16,fp8,0,0.6775893370310465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,1,64,128,1,fp8,fp8,0,0.6388266483942667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,1,64,0,1,float16,fp8,0,0.6742506821950277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,1,64,0,1,fp8,fp8,0,0.6321013371149699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,2,64,128,1,float16,float16,0,0.689466635386149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,2,64,128,1,float16,fp8,0,0.6817706425984701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,2,64,0,1,float16,float16,0,0.6874079704284668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,2,64,128,1,fp8,fp8,0,0.640720009803772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,2,64,0,1,float16,fp8,0,0.6849919954935709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,2,64,0,1,fp8,fp8,0,0.6343199809392294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,4,64,128,1,float16,float16,0,0.6979626814524332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,4,64,0,1,float16,float16,0,0.6993119716644287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,4,64,128,1,float16,fp8,0,0.6945546468098959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,4,64,128,1,fp8,fp8,0,0.7409119606018066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,4,64,0,1,float16,fp8,0,0.6963466803232828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,4,64,0,1,fp8,fp8,0,0.7338240146636963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,8,64,128,1,float16,float16,0,0.6822933355967203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,8,64,0,1,float16,float16,0,0.683247963587443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,8,64,128,1,float16,fp8,0,0.6773760318756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,8,64,128,1,fp8,fp8,0,0.7280800342559814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,8,64,0,1,float16,fp8,0,0.6789546807607015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,8,64,0,1,fp8,fp8,0,0.7337226867675781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,32,64,128,1,float16,float16,0,0.3627573251724243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,32,64,0,1,float16,float16,0,0.36535998185475665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,32,64,128,1,float16,fp8,0,0.35597864786783856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,32,64,128,1,fp8,fp8,0,0.36720534165700275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,32,64,0,1,float16,fp8,0,0.36130666732788086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,32,64,0,1,fp8,fp8,0,0.3656853437423706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,1,64,128,1,float16,float16,0,0.34881067276000977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,1,64,0,1,float16,float16,0,0.34964267412821454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,1,64,128,1,float16,fp8,0,0.34563199679056805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,1,64,128,1,fp8,fp8,0,0.32791467507680255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,1,64,0,1,float16,fp8,0,0.34798399607340497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,1,64,0,1,fp8,fp8,0,0.3237226605415344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,2,64,128,1,float16,float16,0,0.3512800137201945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,2,64,0,1,float16,float16,0,0.3535199960072835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,2,64,128,1,float16,fp8,0,0.3510773181915283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,2,64,128,1,fp8,fp8,0,0.328874667485555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,2,64,0,1,float16,fp8,0,0.35126399993896484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,2,64,0,1,fp8,fp8,0,0.326581339041392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,4,64,128,1,float16,float16,0,0.3587679862976074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,4,64,0,1,float16,float16,0,0.3595466613769531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,4,64,128,1,float16,fp8,0,0.3566133181254069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,4,64,128,1,fp8,fp8,0,0.3587626616160075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,4,64,0,1,float16,fp8,0,0.35769601662953693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,4,64,0,1,fp8,fp8,0,0.357040007909139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,8,64,128,1,float16,float16,0,0.35018666585286456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,8,64,0,1,float16,float16,0,0.35314667224884033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,8,64,128,1,float16,fp8,0,0.3475253184636434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,8,64,128,1,fp8,fp8,0,0.35412267843882245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,32,64,128,1,float16,float16,0,0.1906773249308268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,8,64,0,1,float16,fp8,0,0.3493653138478597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,8,64,0,1,fp8,fp8,0,0.35941867033640545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,32,64,128,1,float16,fp8,0,0.18741333484649658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,32,64,0,1,float16,float16,0,0.19314666589101157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,32,64,128,1,fp8,fp8,0,0.19344000021616617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,32,64,0,1,float16,fp8,0,0.1897439956665039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,32,64,0,1,fp8,fp8,0,0.19476799170176187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,1,64,128,1,float16,float16,0,0.18179200092951456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,1,64,0,1,float16,float16,0,0.18188265959421793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,1,64,128,1,fp8,fp8,0,0.1717546582221985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,1,64,0,1,float16,fp8,0,0.18060266971588135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,1,64,128,1,float16,fp8,0,0.18232532342274985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,1,64,0,1,fp8,fp8,0,0.16944533586502075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,2,64,128,1,float16,float16,0,0.18522133429845175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,2,64,0,1,float16,float16,0,0.18412800629933676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,2,64,128,1,float16,fp8,0,0.18433600664138794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,2,64,128,1,fp8,fp8,0,0.17463467518488565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,2,64,0,1,float16,fp8,0,0.18367467323939005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,2,64,0,1,fp8,fp8,0,0.17116800944010416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,4,64,128,1,float16,float16,0,0.18805332978566489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,4,64,0,1,float16,float16,0,0.18875199556350708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,4,64,128,1,float16,fp8,0,0.18600000937779745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,4,64,128,1,fp8,fp8,0,0.18525334199269614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,4,64,0,1,float16,fp8,0,0.18796267112096152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,4,64,0,1,fp8,fp8,0,0.18334933121999106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,8,64,128,1,float16,float16,0,0.1839253306388855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,8,64,0,1,float16,float16,0,0.18572799364725748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,8,64,128,1,fp8,fp8,0,0.18408532937367758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,8,64,0,1,float16,fp8,0,0.18348799149195352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,8,64,128,1,float16,fp8,0,0.184063990910848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,8,64,0,1,fp8,fp8,0,0.18401066462198892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,32,64,128,1,float16,float16,0,0.10564266641934712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,32,64,0,1,float16,float16,0,0.10805333654085796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,32,64,128,1,float16,fp8,0,0.1051680048306783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,32,64,128,1,fp8,fp8,0,0.1077280044555664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,32,64,0,1,float16,fp8,0,0.10547733306884766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,32,64,0,1,fp8,fp8,0,0.10745066404342651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,1,64,128,1,float16,float16,0,0.09967999656995137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,1,64,128,1,float16,fp8,0,0.09851732850074768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,1,64,0,1,float16,float16,0,0.09891200065612793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,1,64,128,1,fp8,fp8,0,0.09332266449928284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,1,64,0,1,float16,fp8,0,0.09936533371607463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,1,64,0,1,fp8,fp8,0,0.09318932890892029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,2,64,128,1,float16,float16,0,0.09946133693059285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,2,64,0,1,float16,float16,0,0.10050666332244873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,2,64,128,1,float16,fp8,0,0.09956799944241841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,2,64,128,1,fp8,fp8,0,0.09489066402117412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,2,64,0,1,float16,fp8,0,0.09931199749310811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,2,64,0,1,fp8,fp8,0,0.09491200248400371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,4,64,128,1,float16,float16,0,0.10134933392206828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,4,64,0,1,float16,float16,0,0.1011786659558614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,4,64,128,1,fp8,fp8,0,0.10056533416112264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,4,64,128,1,float16,fp8,0,0.10131733616193135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,4,64,0,1,float16,fp8,0,0.10102400183677673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,4,64,0,1,fp8,fp8,0,0.09820266564687093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,8,64,128,1,float16,float16,0,0.10051199793815613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,8,64,0,1,float16,float16,0,0.10004799564679463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,8,64,128,1,float16,fp8,0,0.0993280013402303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,8,64,128,1,fp8,fp8,0,0.10217066605885823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,8,64,0,1,float16,fp8,0,0.0999840001265208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,8,64,0,1,fp8,fp8,0,0.10016000270843506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,32,64,128,1,float16,float16,0,0.0580266664425532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,32,64,0,1,float16,float16,0,0.05789866546789805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,32,64,128,1,float16,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,32,64,128,1,fp8,fp8,0,0.06117866436640421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,32,64,0,1,float16,fp8,0,0.058287998040517174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,32,64,0,1,fp8,fp8,0,0.06011199951171875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,1,64,128,1,float16,float16,0,0.05596800148487091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,1,64,128,1,float16,fp8,0,0.05704000095526377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,1,64,0,1,float16,float16,0,0.05640000104904175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,1,64,128,1,fp8,fp8,0,0.05399466554323832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,1,64,0,1,float16,fp8,0,0.056464001536369324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,1,64,0,1,fp8,fp8,0,0.05439466734727224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,2,64,128,1,float16,float16,0,0.05641066531340281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,2,64,0,1,float16,float16,0,0.056517332792282104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,2,64,128,1,float16,fp8,0,0.05810666580994924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,2,64,128,1,fp8,fp8,0,0.05438933273156484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,2,64,0,1,float16,fp8,0,0.056234667698542275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,2,64,0,1,fp8,fp8,0,0.053904001911481224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,4,64,128,1,float16,float16,0,0.05807466804981232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,4,64,0,1,float16,float16,0,0.058229332168896995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,4,64,128,1,float16,fp8,0,0.058362667759259544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,4,64,128,1,fp8,fp8,0,0.05783466498057047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,4,64,0,1,float16,fp8,0,0.05829866727193197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,4,64,0,1,fp8,fp8,0,0.056143999099731445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,8,64,128,1,float16,float16,0,0.057861333092053734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,8,64,0,1,float16,float16,0,0.056501333912213646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,8,64,128,1,float16,fp8,0,0.0566240002711614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,8,64,128,1,fp8,fp8,0,0.05825600028038025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,8,64,0,1,float16,fp8,0,0.055957332253456116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,32,64,128,1,float16,float16,0,0.037418665985266365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,8,64,0,1,fp8,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,32,64,0,1,float16,float16,0,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,32,64,128,1,float16,fp8,0,0.03775999943415324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,32,64,128,1,fp8,fp8,0,0.03800000001986822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,32,64,0,1,float16,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,32,64,0,1,fp8,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,1,64,128,1,float16,float16,0,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,1,64,0,1,float16,float16,0,0.03811199963092804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,1,64,128,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,1,64,128,1,fp8,fp8,0,0.0371573343873024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,1,64,0,1,float16,fp8,0,0.03791466603676478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,1,64,0,1,fp8,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,2,64,128,1,float16,float16,0,0.03769599894682566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,2,64,0,1,float16,float16,0,0.038533332447210945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,2,64,128,1,float16,fp8,0,0.037802666425704956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,2,64,128,1,fp8,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,2,64,0,1,float16,fp8,0,0.03879466652870178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,2,64,0,1,fp8,fp8,0,0.035717333356539406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,4,64,128,1,float16,float16,0,0.03843733419974645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,4,64,0,1,float16,float16,0,0.037845333417256675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,4,64,128,1,float16,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,4,64,128,1,fp8,fp8,0,0.03806400050719579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,4,64,0,1,float16,fp8,0,0.038319999972979225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,4,64,0,1,fp8,fp8,0,0.037530665596326195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,8,64,128,1,float16,float16,0,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,8,64,0,1,float16,float16,0,0.03783999880154928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,8,64,128,1,float16,fp8,0,0.038176000118255615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,8,64,128,1,fp8,fp8,0,0.03793066740036011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,8,64,0,1,float16,fp8,0,0.037903999288876854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,8,64,0,1,fp8,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,32,64,128,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,32,64,0,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,32,64,128,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,32,64,128,1,fp8,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,32,64,0,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,32,64,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,1,64,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,1,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,1,64,128,1,float16,fp8,0,0.025807999074459076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,1,64,128,1,fp8,fp8,0,0.024330665667851765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,1,64,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,1,64,0,1,fp8,fp8,0,0.024986666937669117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,2,64,128,1,float16,float16,0,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,2,64,0,1,float16,float16,0,0.025605333348115284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,2,64,128,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,2,64,128,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,2,64,0,1,float16,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,2,64,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,4,64,128,1,float16,float16,0,0.026234666506449383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,4,64,0,1,float16,float16,0,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,4,64,128,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,4,64,128,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,4,64,0,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,4,64,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,8,64,128,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,8,64,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,8,64,128,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,8,64,128,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,8,64,0,1,float16,fp8,0,0.02646933247645696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,8,64,0,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,32,64,128,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,32,64,0,1,float16,float16,0,0.022730665902296703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,32,64,128,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,32,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,32,64,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,32,64,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,1,64,128,1,float16,float16,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,1,64,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,1,64,128,1,float16,fp8,0,0.021514666577180225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,1,64,128,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,1,64,0,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,1,64,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,2,64,128,1,float16,float16,0,0.02294933299223582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,2,64,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,2,64,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,2,64,128,1,fp8,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,2,64,0,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,2,64,0,1,fp8,fp8,0,0.020741333564122517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,4,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,4,64,0,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,4,64,128,1,float16,fp8,0,0.021914665897687275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,4,64,128,1,fp8,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,4,64,0,1,float16,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,8,64,128,1,float16,float16,0,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,8,64,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,4,64,0,1,fp8,fp8,0,0.022266666094462078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,8,64,128,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,8,64,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,8,64,0,1,float16,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,8,64,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,1,64,128,1,float16,float16,0,0.6386986573537191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,1,64,0,1,float16,float16,0,0.6248960097630819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,1,64,128,1,float16,fp8,0,0.6321173508961996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,1,64,128,1,fp8,fp8,0,0.6002346674601237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,1,64,0,1,fp8,fp8,0,0.5820586681365967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,1,64,0,1,float16,fp8,0,0.6174666484196981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,2,64,128,1,float16,float16,0,0.6479680140813192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,2,64,0,1,float16,float16,0,0.6330453157424927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,2,64,128,1,float16,fp8,0,0.6417226791381836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,2,64,128,1,fp8,fp8,0,0.6047573486963908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,2,64,0,1,fp8,fp8,0,0.5863946676254272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,2,64,0,1,float16,fp8,0,0.6290613412857056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,4,64,128,1,float16,float16,0,0.6595040162404379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,4,64,0,1,float16,float16,0,0.6431413491566976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,4,64,128,1,float16,fp8,0,0.6530666748682658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,4,64,128,1,fp8,fp8,0,0.6880319913228353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,4,64,0,1,float16,fp8,0,0.6370346546173096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,4,64,0,1,fp8,fp8,0,0.6703840096791586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,8,64,0,1,float16,float16,0,0.6293439865112305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,8,64,128,1,float16,float16,0,0.641754666964213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,8,64,128,1,float16,fp8,0,0.6349759896596273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,8,64,128,1,fp8,fp8,0,0.6916800340016683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,32,64,128,1,float16,float16,0,0.33955732981363934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,8,64,0,1,float16,fp8,0,0.6197760105133057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,8,64,0,1,fp8,fp8,0,0.6749493281046549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,32,64,0,1,float16,float16,0,0.33316800991694134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,32,64,128,1,float16,fp8,0,0.3359946807225545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,32,64,128,1,fp8,fp8,0,0.3466666539510091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,32,64,0,1,float16,fp8,0,0.32731733719507855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,32,64,0,1,fp8,fp8,0,0.3394186496734619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,1,64,128,1,float16,float16,0,0.3272479971249898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,1,64,0,1,float16,float16,0,0.31967999537785846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,1,64,128,1,float16,fp8,0,0.3250666658083598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,1,64,128,1,fp8,fp8,0,0.30686400334040326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,1,64,0,1,float16,fp8,0,0.31700799862543744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,1,64,0,1,fp8,fp8,0,0.2972319920857747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,2,64,128,1,float16,float16,0,0.33082133531570435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,2,64,0,1,float16,float16,0,0.32261866331100464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,2,64,128,1,float16,fp8,0,0.32922667264938354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,2,64,128,1,fp8,fp8,0,0.3105439941088359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,2,64,0,1,float16,fp8,0,0.3222506642341614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,2,64,0,1,fp8,fp8,0,0.30222400029500324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,4,64,128,1,float16,float16,0,0.3366080125172933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,4,64,0,1,float16,float16,0,0.32870399951934814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,4,64,128,1,float16,fp8,0,0.33424532413482666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,4,64,128,1,fp8,fp8,0,0.33243199189503986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,4,64,0,1,float16,fp8,0,0.32741334040959674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,4,64,0,1,fp8,fp8,0,0.3248106638590495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,8,64,128,1,float16,float16,0,0.3286400039990743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,8,64,0,1,float16,float16,0,0.319760004679362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,8,64,128,1,float16,fp8,0,0.32660800218582153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,8,64,128,1,fp8,fp8,0,0.3331200083096822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,32,64,128,1,float16,float16,0,0.18106132745742798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,8,64,0,1,float16,fp8,0,0.3184426625569661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,8,64,0,1,fp8,fp8,0,0.329370657602946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,32,64,0,1,float16,float16,0,0.17642666896184286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,32,64,128,1,float16,fp8,0,0.17849600315093994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,32,64,128,1,fp8,fp8,0,0.1831093430519104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,32,64,0,1,float16,fp8,0,0.17440533638000488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,32,64,0,1,fp8,fp8,0,0.17918399969736734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,1,64,128,1,float16,float16,0,0.171999990940094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,1,64,0,1,float16,float16,0,0.16718933979670206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,1,64,128,1,float16,fp8,0,0.17081600427627563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,1,64,128,1,fp8,fp8,0,0.16263999541600546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,1,64,0,1,float16,fp8,0,0.16646933555603027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,1,64,0,1,fp8,fp8,0,0.15676800409952799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,2,64,128,1,float16,float16,0,0.17391467094421387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,2,64,0,1,float16,float16,0,0.16902933518091837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,2,64,128,1,float16,fp8,0,0.17277334133783975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,2,64,128,1,fp8,fp8,0,0.16501333316167197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,2,64,0,1,float16,fp8,0,0.16887466112772623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,2,64,0,1,fp8,fp8,0,0.15876799821853638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,4,64,128,1,float16,float16,0,0.17698667446772257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,4,64,0,1,float16,float16,0,0.17359999815622965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,4,64,128,1,float16,fp8,0,0.17621866861979166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,4,64,128,1,fp8,fp8,0,0.17466666301091513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,4,64,0,1,float16,fp8,0,0.17157334089279175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,4,64,0,1,fp8,fp8,0,0.1689173380533854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,8,64,128,1,float16,float16,0,0.17320533593495688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,8,64,0,1,float16,float16,0,0.17086400588353476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,8,64,128,1,float16,fp8,0,0.17297067244847616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,8,64,128,1,fp8,fp8,0,0.1734559933344523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,8,64,0,1,float16,fp8,0,0.16870933771133423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,8,64,0,1,fp8,fp8,0,0.16910399993260702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,32,64,128,1,float16,float16,0,0.10123200217882793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,32,64,0,1,float16,float16,0,0.0992693305015564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,32,64,128,1,float16,fp8,0,0.09983467062314351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,32,64,128,1,fp8,fp8,0,0.10250666737556458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,32,64,0,1,float16,fp8,0,0.09719467163085938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,32,64,0,1,fp8,fp8,0,0.09975999593734741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,1,64,128,1,float16,float16,0,0.09192533294359843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,1,64,0,1,float16,float16,0,0.08897067109743755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,1,64,128,1,float16,fp8,0,0.09089066584904988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,1,64,128,1,fp8,fp8,0,0.08730666836102803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,1,64,0,1,float16,fp8,0,0.08923199772834778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,1,64,0,1,fp8,fp8,0,0.08396266897519429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,2,64,128,1,float16,float16,0,0.09289600451787312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,2,64,0,1,float16,float16,0,0.09264533718427022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,2,64,128,1,float16,fp8,0,0.09293867150942485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,2,64,128,1,fp8,fp8,0,0.08891733487447102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,2,64,0,1,float16,fp8,0,0.09097599983215332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,2,64,0,1,fp8,fp8,0,0.08694400389989217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,4,64,128,1,float16,float16,0,0.09518933296203613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,4,64,0,1,float16,float16,0,0.09319466352462769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,4,64,128,1,float16,fp8,0,0.09499200185139973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,4,64,0,1,float16,fp8,0,0.09258133172988892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,4,64,128,1,fp8,fp8,0,0.09453866879145305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,4,64,0,1,fp8,fp8,0,0.09078400333722432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,8,64,128,1,float16,float16,0,0.09338133533795674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,8,64,0,1,float16,float16,0,0.09083200494448344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,8,64,128,1,float16,fp8,0,0.09316266576449077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,8,64,128,1,fp8,fp8,0,0.0956053336461385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,8,64,0,1,float16,fp8,0,0.09105599919954936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,8,64,0,1,fp8,fp8,0,0.0916373332341512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,32,64,128,1,float16,float16,0,0.055717334151268005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,32,64,0,1,float16,float16,0,0.05431999762852987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,32,64,128,1,float16,fp8,0,0.05401599903901418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,32,64,0,1,float16,fp8,0,0.05182399849096934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,32,64,128,1,fp8,fp8,0,0.058245331048965454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,32,64,0,1,fp8,fp8,0,0.05619200070699056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,1,64,128,1,float16,float16,0,0.05399466554323832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,1,64,0,1,float16,float16,0,0.05209066470464071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,1,64,128,1,float16,fp8,0,0.05420800050099691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,1,64,128,1,fp8,fp8,0,0.05184000233809153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,1,64,0,1,float16,fp8,0,0.05218133330345154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,1,64,0,1,fp8,fp8,0,0.05021866659323374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,2,64,128,1,float16,float16,0,0.05409066875775655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,2,64,0,1,float16,float16,0,0.052005335688591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,2,64,128,1,fp8,fp8,0,0.05390933156013489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,2,64,128,1,float16,fp8,0,0.054117331902186074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,2,64,0,1,float16,fp8,0,0.05194133520126343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,2,64,0,1,fp8,fp8,0,0.05208533505598704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,4,64,128,1,float16,float16,0,0.055813332398732506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,4,64,0,1,float16,float16,0,0.05369600156943003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,4,64,128,1,float16,fp8,0,0.05426133175690969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,4,64,128,1,fp8,fp8,0,0.05384533107280731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,4,64,0,1,float16,fp8,0,0.05310933291912079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,4,64,0,1,fp8,fp8,0,0.0521066685517629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,8,64,128,1,float16,float16,0,0.053930665055910744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,8,64,0,1,float16,float16,0,0.0528053343296051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,8,64,128,1,float16,fp8,0,0.05397333204746246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,8,64,128,1,fp8,fp8,0,0.054485330979029335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,8,64,0,1,float16,fp8,0,0.052501335740089417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,8,64,0,1,fp8,fp8,0,0.05385600030422211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,32,64,128,1,float16,float16,0,0.03585600107908249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,32,64,0,1,float16,float16,0,0.03533866753180822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,32,64,128,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,32,64,128,1,fp8,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,32,64,0,1,float16,fp8,0,0.03522666543722153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,32,64,0,1,fp8,fp8,0,0.035386666655540466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,1,64,128,1,float16,float16,0,0.03425599883000056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,1,64,0,1,float16,float16,0,0.03359466542800268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,1,64,128,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,1,64,128,1,fp8,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,1,64,0,1,float16,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,1,64,0,1,fp8,fp8,0,0.03268799930810928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,2,64,128,1,float16,float16,0,0.03573333223660787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,2,64,0,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,2,64,128,1,float16,fp8,0,0.03504000107447306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,2,64,128,1,fp8,fp8,0,0.035455999275048576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,2,64,0,1,float16,fp8,0,0.033471999069054924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,2,64,0,1,fp8,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,4,64,128,1,float16,float16,0,0.035642666121323906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,4,64,0,1,float16,float16,0,0.03534399966398875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,4,64,128,1,float16,fp8,0,0.035445332527160645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,4,64,0,1,float16,fp8,0,0.03555200000603994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,4,64,128,1,fp8,fp8,0,0.03588266670703888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,4,64,0,1,fp8,fp8,0,0.035045333206653595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,8,64,128,1,float16,float16,0,0.035274667044480644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,8,64,0,1,float16,float16,0,0.035487999518712364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,8,64,128,1,float16,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,8,64,128,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,8,64,0,1,float16,fp8,0,0.0337119996547699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,8,64,0,1,fp8,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,32,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,32,64,0,1,float16,float16,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,32,64,128,1,float16,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,32,64,128,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,32,64,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,1,64,128,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,1,64,128,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,32,64,0,1,fp8,fp8,0,0.0240639994541804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,1,64,0,1,float16,float16,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,1,64,128,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,1,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,1,64,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,2,64,128,1,float16,float16,0,0.02369066576162974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,2,64,0,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,2,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,2,64,128,1,fp8,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,2,64,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,2,64,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,4,64,0,1,float16,float16,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,4,64,128,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,4,64,128,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,4,64,128,1,fp8,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,4,64,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,4,64,0,1,fp8,fp8,0,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,8,64,128,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,8,64,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,8,64,128,1,float16,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,8,64,128,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,8,64,0,1,float16,fp8,0,0.023765332996845245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,8,64,0,1,fp8,fp8,0,0.02447466552257538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,32,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,32,64,0,1,float16,float16,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,32,64,128,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,32,64,128,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,32,64,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,32,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,1,64,128,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,1,64,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,1,64,128,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,1,64,128,1,fp8,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,1,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,1,64,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,2,64,128,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,2,64,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,2,64,128,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,2,64,128,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,2,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,2,64,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,4,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,4,64,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,4,64,128,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,4,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,4,64,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,4,64,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,8,64,128,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,8,64,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,8,64,128,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,8,64,128,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,8,64,0,1,float16,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,8,64,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,32,64,128,1,float16,float16,0,0.01802666609485944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,32,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,32,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,32,64,128,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,1,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,32,64,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,32,64,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,1,64,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,1,64,128,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,1,64,128,1,fp8,fp8,0,0.017871999492247898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,1,64,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,1,64,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,2,64,128,1,float16,float16,0,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,2,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,2,64,128,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,2,64,128,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,2,64,0,1,fp8,fp8,0,0.017893332988023758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,2,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,4,64,128,1,float16,float16,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,4,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,4,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,4,64,128,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,4,64,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,4,64,0,1,float16,fp8,0,0.018053332964579265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,8,64,128,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,8,64,0,1,float16,float16,0,0.020645332833131153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,8,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,8,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,8,64,0,1,float16,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,8,64,0,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,1,64,128,1,float16,float16,0,0.3370720148086548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,1,64,128,1,float16,fp8,0,0.33270400762557983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,1,64,0,1,float16,float16,0,0.3367893298467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,1,64,128,1,fp8,fp8,0,0.32526934146881104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,1,64,0,1,float16,fp8,0,0.3340533177057902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,1,64,0,1,fp8,fp8,0,0.32468799750010174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,2,64,128,1,float16,float16,0,0.3428853352864583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,2,64,0,1,float16,float16,0,0.3412799835205078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,2,64,128,1,float16,fp8,0,0.3389013210932414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,2,64,128,1,fp8,fp8,0,0.31432533264160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,2,64,0,1,fp8,fp8,0,0.314736008644104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,2,64,0,1,float16,fp8,0,0.33909865220387775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,4,64,128,1,float16,float16,0,0.3502560059229533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,4,64,0,1,float16,float16,0,0.3492853244145711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,4,64,128,1,float16,fp8,0,0.3458453416824341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,4,64,128,1,fp8,fp8,0,0.3422773281733195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,4,64,0,1,float16,fp8,0,0.34562134742736816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,4,64,0,1,fp8,fp8,0,0.34327999750773114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,8,64,128,1,float16,float16,0,0.3407573302586873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,8,64,0,1,float16,float16,0,0.33959468205769855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,8,64,128,1,float16,fp8,0,0.33658134937286377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,8,64,0,1,float16,fp8,0,0.3352533181508382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,8,64,128,1,fp8,fp8,0,0.34831468264261883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,8,64,0,1,fp8,fp8,0,0.34700266520182294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,32,64,128,1,float16,float16,0,0.18477332592010498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,32,64,128,1,float16,fp8,0,0.18239466349283853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,32,64,0,1,float16,float16,0,0.183514674504598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,32,64,128,1,fp8,fp8,0,0.18625066677729288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,32,64,0,1,float16,fp8,0,0.18197333812713623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,32,64,0,1,fp8,fp8,0,0.1848586599032084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,1,64,128,1,float16,float16,0,0.17428267002105713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,1,64,0,1,float16,float16,0,0.17477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,1,64,128,1,float16,fp8,0,0.1731520096460978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,1,64,128,1,fp8,fp8,0,0.1678826610247294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,1,64,0,1,float16,fp8,0,0.17303466796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,1,64,0,1,fp8,fp8,0,0.16706132888793945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,2,64,0,1,float16,float16,0,0.17685866355895996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,2,64,128,1,float16,float16,0,0.17710934082667032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,2,64,128,1,float16,fp8,0,0.17733333508173624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,2,64,128,1,fp8,fp8,0,0.16527466972668967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,2,64,0,1,float16,fp8,0,0.1767786741256714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,2,64,0,1,fp8,fp8,0,0.16583466529846191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,4,64,128,1,float16,float16,0,0.18247999747594199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,4,64,0,1,float16,float16,0,0.18198400735855103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,4,64,128,1,float16,fp8,0,0.17977599302927652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,4,64,128,1,fp8,fp8,0,0.17747199535369873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,4,64,0,1,float16,fp8,0,0.1797013282775879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,4,64,0,1,fp8,fp8,0,0.1755626598993937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,8,64,128,1,float16,float16,0,0.17822933197021484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,8,64,0,1,float16,float16,0,0.17718400557835898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,8,64,128,1,float16,fp8,0,0.17648533980051676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,8,64,0,1,float16,fp8,0,0.17562133073806763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,8,64,128,1,fp8,fp8,0,0.17940266927083334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,8,64,0,1,fp8,fp8,0,0.17890665928522745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,32,64,128,1,float16,float16,0,0.10256533821423848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,32,64,0,1,float16,float16,0,0.1014400025208791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,32,64,128,1,float16,fp8,0,0.10142399867375691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,32,64,0,1,float16,fp8,0,0.10046933094660442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,32,64,128,1,fp8,fp8,0,0.10402666529019673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,32,64,0,1,fp8,fp8,0,0.10321066776911418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,1,64,128,1,float16,float16,0,0.09507733583450317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,1,64,0,1,float16,float16,0,0.09526399771372478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,1,64,128,1,float16,fp8,0,0.09326400359471639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,1,64,128,1,fp8,fp8,0,0.08962133526802063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,1,64,0,1,float16,fp8,0,0.093231995900472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,1,64,0,1,fp8,fp8,0,0.08996267120043437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,2,64,128,1,float16,float16,0,0.09724266330401103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,2,64,0,1,float16,float16,0,0.09544533491134644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,2,64,128,1,float16,fp8,0,0.09505066275596619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,2,64,128,1,fp8,fp8,0,0.08990400036176045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,2,64,0,1,float16,fp8,0,0.09506666660308838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,2,64,0,1,fp8,fp8,0,0.09020800391832988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,4,64,0,1,float16,float16,0,0.09717333316802979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,4,64,128,1,float16,float16,0,0.09859200318654378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,4,64,128,1,float16,fp8,0,0.09745599826176961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,4,64,0,1,float16,fp8,0,0.09708266456921895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,4,64,128,1,fp8,fp8,0,0.09496532877286275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,4,64,0,1,fp8,fp8,0,0.09589866797129314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,8,64,128,1,float16,float16,0,0.09687466422716777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,8,64,0,1,float16,float16,0,0.09690133730570476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,8,64,128,1,float16,fp8,0,0.09541333715120952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,8,64,128,1,fp8,fp8,0,0.09718933701515198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,32,64,128,1,float16,float16,0,0.054485330979029335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,8,64,0,1,float16,fp8,0,0.09691733121871948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,8,64,0,1,fp8,fp8,0,0.09733333190282185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,32,64,0,1,float16,float16,0,0.05377600093682607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,32,64,128,1,float16,fp8,0,0.054325332244237266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,32,64,128,1,fp8,fp8,0,0.058304001887639366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,32,64,0,1,float16,fp8,0,0.05406400064627329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,32,64,0,1,fp8,fp8,0,0.05694933235645294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,1,64,128,1,float16,float16,0,0.05390933156013489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,1,64,0,1,float16,float16,0,0.054117331902186074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,1,64,128,1,float16,fp8,0,0.05213866631189982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,1,64,128,1,fp8,fp8,0,0.05217599868774414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,1,64,0,1,fp8,fp8,0,0.050885334610939026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,1,64,0,1,float16,fp8,0,0.053861334919929504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,2,64,128,1,float16,float16,0,0.0539680023988088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,2,64,0,1,float16,float16,0,0.053823997577031456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,2,64,128,1,float16,fp8,0,0.05388799806435903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,2,64,0,1,float16,fp8,0,0.05402666827042898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,2,64,128,1,fp8,fp8,0,0.05248000224431356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,2,64,0,1,fp8,fp8,0,0.05202133456865946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,4,64,128,1,float16,float16,0,0.05434666574001312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,4,64,0,1,float16,float16,0,0.056101332108179726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,4,64,128,1,float16,fp8,0,0.0543039987484614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,4,64,128,1,fp8,fp8,0,0.054154664278030396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,4,64,0,1,float16,fp8,0,0.0539626677831014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,4,64,0,1,fp8,fp8,0,0.054042667150497437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,8,64,128,1,float16,float16,0,0.054229333996772766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,8,64,0,1,float16,float16,0,0.053818667928377785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,8,64,128,1,float16,fp8,0,0.053786665201187134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,8,64,128,1,fp8,fp8,0,0.053871999184290566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,8,64,0,1,float16,fp8,0,0.05237866441408793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,8,64,0,1,fp8,fp8,0,0.05388799806435903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,32,64,128,1,float16,float16,0,0.037418665985266365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,32,64,0,1,float16,float16,0,0.03714133302370707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,32,64,128,1,float16,fp8,0,0.03573333223660787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,32,64,128,1,fp8,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,32,64,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,32,64,0,1,fp8,fp8,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,1,64,128,1,float16,float16,0,0.035930665830771126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,1,64,0,1,float16,float16,0,0.035386666655540466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,1,64,128,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,1,64,128,1,fp8,fp8,0,0.03534399966398875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,1,64,0,1,float16,fp8,0,0.0358240008354187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,1,64,0,1,fp8,fp8,0,0.033973333736260734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,2,64,128,1,float16,float16,0,0.035487999518712364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,2,64,0,1,float16,float16,0,0.035631999373435974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,2,64,128,1,fp8,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,2,64,128,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,2,64,0,1,float16,fp8,0,0.03583466758330663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,2,64,0,1,fp8,fp8,0,0.0360000009338061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,4,64,128,1,float16,float16,0,0.03746666759252548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,4,64,0,1,float16,float16,0,0.037861332297325134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,4,64,128,1,float16,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,4,64,128,1,fp8,fp8,0,0.03654933224121729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,4,64,0,1,float16,fp8,0,0.03771200031042099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,4,64,0,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,8,64,128,1,float16,float16,0,0.036090667049090065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,8,64,0,1,float16,float16,0,0.03586666782697042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,8,64,128,1,float16,fp8,0,0.03583999971548716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,8,64,128,1,fp8,fp8,0,0.03585066646337509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,8,64,0,1,float16,fp8,0,0.036015999813874565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,8,64,0,1,fp8,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,32,64,128,1,float16,float16,0,0.02363733450571696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,32,64,0,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,32,64,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,32,64,128,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,32,64,0,1,float16,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,32,64,0,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,1,64,128,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,1,64,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,1,64,128,1,float16,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,1,64,128,1,fp8,fp8,0,0.024090667565663654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,1,64,0,1,float16,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,1,64,0,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,2,64,128,1,float16,float16,0,0.025546667476495106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,2,64,0,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,2,64,128,1,float16,fp8,0,0.02499733368555705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,2,64,128,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,2,64,0,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,2,64,0,1,fp8,fp8,0,0.024341332415739696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,4,64,128,1,float16,float16,0,0.023898666103680927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,4,64,128,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,4,64,0,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,4,64,128,1,fp8,fp8,0,0.024832000335057575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,4,64,0,1,float16,fp8,0,0.02510933329661687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,8,64,128,1,float16,float16,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,4,64,0,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,8,64,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,8,64,128,1,float16,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,8,64,128,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,8,64,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,32,64,128,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,8,64,0,1,fp8,fp8,0,0.02366400013367335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,32,64,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,32,64,128,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,32,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,32,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,32,64,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,1,64,128,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,1,64,0,1,float16,float16,0,0.01836266616980235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,1,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,1,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,1,64,0,1,float16,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,1,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,2,64,128,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,2,64,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,2,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,2,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,2,64,0,1,float16,fp8,0,0.018768000106016796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,2,64,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,4,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,4,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,4,64,128,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,4,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,4,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,4,64,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,8,64,128,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,8,64,0,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,8,64,128,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,8,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,8,64,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,8,64,0,1,fp8,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,32,64,128,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,32,64,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,32,64,128,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,32,64,128,1,fp8,fp8,0,0.016021333634853363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,32,64,0,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,32,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,1,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,1,64,0,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,1,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,1,64,128,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,1,64,0,1,float16,fp8,0,0.016261332978804905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,1,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,2,64,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,2,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,2,64,128,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,2,64,128,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,2,64,0,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,2,64,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,4,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,4,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,4,64,128,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,4,64,128,1,fp8,fp8,0,0.015861333658297855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,4,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,4,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,8,64,128,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,8,64,0,1,float16,float16,0,0.015989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,8,64,128,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,8,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,8,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,8,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,32,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,32,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,32,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,32,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,32,64,0,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,1,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,32,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,1,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,1,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,1,64,128,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,1,64,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,1,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,2,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,2,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,2,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,2,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,2,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,2,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,4,64,128,1,float16,float16,0,0.015674666812022526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,4,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,4,64,128,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,4,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,4,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,4,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,8,64,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,8,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,8,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,8,64,128,1,fp8,fp8,0,0.01651200031240781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,8,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,8,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,1,64,128,1,float16,float16,0,0.2339093287785848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,1,64,0,1,float16,float16,0,0.23254932959874472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,1,64,128,1,float16,fp8,0,0.23089599609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,1,64,128,1,fp8,fp8,0,0.2217493255933126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,1,64,0,1,float16,fp8,0,0.23054933547973633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,1,64,0,1,fp8,fp8,0,0.22216000159581503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,2,64,128,1,float16,float16,0,0.23449599742889404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,2,64,0,1,float16,float16,0,0.23442665735880533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,2,64,128,1,float16,fp8,0,0.23229867219924927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,2,64,128,1,fp8,fp8,0,0.21661865711212158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,2,64,0,1,float16,fp8,0,0.23274133602778116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,2,64,0,1,fp8,fp8,0,0.21596266825993857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,4,64,128,1,float16,float16,0,0.23808000485102335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,4,64,0,1,float16,float16,0,0.23702400922775269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,4,64,128,1,float16,fp8,0,0.23319466908772787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,4,64,128,1,fp8,fp8,0,0.2291626731554667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,4,64,0,1,fp8,fp8,0,0.2283626596132914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,4,64,0,1,float16,fp8,0,0.2339466611544291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,8,64,128,1,float16,float16,0,0.23406400283177695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,8,64,0,1,float16,float16,0,0.23290133476257324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,8,64,128,1,float16,fp8,0,0.2323466738065084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,32,64,128,1,float16,float16,0,0.13004266222318014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,8,64,128,1,fp8,fp8,0,0.23016534248987833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,8,64,0,1,float16,fp8,0,0.23447465896606445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,8,64,0,1,fp8,fp8,0,0.2294506629308065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,32,64,0,1,float16,float16,0,0.1300320029258728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,32,64,128,1,float16,fp8,0,0.12808533509572348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,32,64,128,1,fp8,fp8,0,0.12801067034403482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,32,64,0,1,float16,fp8,0,0.12914666533470154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,32,64,0,1,fp8,fp8,0,0.12852266430854797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,1,64,128,1,float16,float16,0,0.12218133608500163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,1,64,0,1,float16,float16,0,0.12157332897186279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,1,64,128,1,float16,fp8,0,0.12174399693806966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,1,64,128,1,fp8,fp8,0,0.11565333604812622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,1,64,0,1,float16,fp8,0,0.12191466490427653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,1,64,0,1,fp8,fp8,0,0.11559466520945232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,2,64,128,1,float16,float16,0,0.12249066432317098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,2,64,128,1,float16,fp8,0,0.12178666392962138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,2,64,0,1,float16,float16,0,0.12362666924794515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,2,64,128,1,fp8,fp8,0,0.11559999982515971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,2,64,0,1,float16,fp8,0,0.12277866403261821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,2,64,0,1,fp8,fp8,0,0.11611732840538025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,4,64,128,1,float16,float16,0,0.12586667140324911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,4,64,0,1,float16,float16,0,0.12504000465075174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,4,64,128,1,float16,fp8,0,0.1236853301525116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,4,64,128,1,fp8,fp8,0,0.12174933155377705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,4,64,0,1,float16,fp8,0,0.12426132957140605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,4,64,0,1,fp8,fp8,0,0.11942933003107707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,8,64,128,1,float16,float16,0,0.12401066223780315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,8,64,0,1,float16,float16,0,0.12379200259844463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,8,64,128,1,float16,fp8,0,0.12542399764060974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,8,64,128,1,fp8,fp8,0,0.12246400117874146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,8,64,0,1,float16,fp8,0,0.12379200259844463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,32,64,128,1,float16,float16,0,0.07036266724268596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,32,64,0,1,float16,float16,0,0.07017066578070323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,8,64,0,1,fp8,fp8,0,0.12372799714406331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,32,64,128,1,float16,fp8,0,0.07025599976380666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,32,64,0,1,float16,fp8,0,0.07084799806276958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,32,64,128,1,fp8,fp8,0,0.07066133121649425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,32,64,0,1,fp8,fp8,0,0.07036800185839336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,1,64,128,1,float16,float16,0,0.06840533514817555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,1,64,128,1,float16,fp8,0,0.06703466673692067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,1,64,128,1,fp8,fp8,0,0.06438399851322174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,1,64,0,1,float16,float16,0,0.06857599814732869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,1,64,0,1,float16,fp8,0,0.06816533207893372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,1,64,0,1,fp8,fp8,0,0.06450133522351582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,2,64,128,1,float16,float16,0,0.06832000116507213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,2,64,0,1,float16,float16,0,0.06854400038719177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,2,64,128,1,float16,fp8,0,0.06854400038719177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,2,64,128,1,fp8,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,2,64,0,1,float16,fp8,0,0.06851733227570851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,2,64,0,1,fp8,fp8,0,0.06569066643714905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,4,64,128,1,float16,float16,0,0.07030400137106578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,4,64,0,1,float16,float16,0,0.07019199927647908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,4,64,128,1,float16,fp8,0,0.06864533325036366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,4,64,128,1,fp8,fp8,0,0.06714666883150737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,4,64,0,1,float16,fp8,0,0.06828799843788147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,4,64,0,1,fp8,fp8,0,0.06640000144640605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,8,64,128,1,float16,float16,0,0.06836266815662384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,8,64,0,1,float16,float16,0,0.0682773341735204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,8,64,128,1,float16,fp8,0,0.06820799907048543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,8,64,128,1,fp8,fp8,0,0.06649599969387054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,8,64,0,1,float16,fp8,0,0.06832000116507213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,8,64,0,1,fp8,fp8,0,0.06643199920654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,32,64,128,1,float16,float16,0,0.04178133110205332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,32,64,0,1,float16,float16,0,0.043290664752324425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,32,64,128,1,float16,fp8,0,0.0440533310174942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,32,64,128,1,fp8,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,32,64,0,1,float16,fp8,0,0.041802664597829185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,32,64,0,1,fp8,fp8,0,0.04358933369318644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,1,64,128,1,float16,float16,0,0.041877334316571556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,1,64,0,1,float16,float16,0,0.04156800111134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,1,64,128,1,float16,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,1,64,128,1,fp8,fp8,0,0.04004266609748205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,1,64,0,1,float16,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,1,64,0,1,fp8,fp8,0,0.03998400022586187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,2,64,128,1,float16,float16,0,0.04229333500067393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,2,64,0,1,float16,float16,0,0.04182933270931244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,2,64,128,1,float16,fp8,0,0.04247466723124186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,2,64,128,1,fp8,fp8,0,0.04172799984614054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,2,64,0,1,float16,fp8,0,0.04228800038496653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,2,64,0,1,fp8,fp8,0,0.041493333876132965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,4,64,128,1,float16,float16,0,0.04189866781234741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,4,64,0,1,float16,float16,0,0.04363733530044556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,4,64,128,1,float16,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,4,64,128,1,fp8,fp8,0,0.04177600145339966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,4,64,0,1,float16,fp8,0,0.043765331308046974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,4,64,0,1,fp8,fp8,0,0.0421066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,8,64,128,1,float16,float16,0,0.041536000867684685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,8,64,0,1,float16,float16,0,0.04225599765777588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,8,64,128,1,float16,fp8,0,0.04171200096607208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,8,64,128,1,fp8,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,8,64,0,1,float16,fp8,0,0.04178666571776072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,8,64,0,1,fp8,fp8,0,0.04148799926042557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,32,64,128,1,float16,float16,0,0.029685333371162415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,32,64,0,1,float16,float16,0,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,32,64,128,1,float16,fp8,0,0.029535998900731403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,32,64,128,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,32,64,0,1,float16,fp8,0,0.027850667635599773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,32,64,0,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,1,64,128,1,float16,float16,0,0.027477333943049114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,1,64,0,1,float16,float16,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,1,64,128,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,1,64,128,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,1,64,0,1,float16,fp8,0,0.029071999092896778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,1,64,0,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,2,64,128,1,float16,float16,0,0.029333333174387615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,2,64,0,1,float16,float16,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,2,64,128,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,2,64,128,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,2,64,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,2,64,0,1,fp8,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,4,64,128,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,4,64,0,1,float16,float16,0,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,4,64,128,1,float16,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,4,64,128,1,fp8,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,4,64,0,1,float16,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,4,64,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,8,64,128,1,float16,float16,0,0.027210667729377747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,8,64,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,8,64,128,1,float16,fp8,0,0.029834667841593426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,8,64,128,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,8,64,0,1,float16,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,8,64,0,1,fp8,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,32,64,128,1,float16,float16,0,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,32,64,0,1,float16,float16,0,0.020842666427294414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,32,64,128,1,float16,fp8,0,0.020725333442290623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,32,64,128,1,fp8,fp8,0,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,32,64,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,32,64,0,1,fp8,fp8,0,0.020367999871571858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,1,64,128,1,float16,float16,0,0.020869334538777668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,1,64,128,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,1,64,128,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,1,64,0,1,float16,float16,0,0.02176533391078313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,1,64,0,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,1,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,2,64,128,1,float16,float16,0,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,2,64,128,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,2,64,0,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,2,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,2,64,0,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,2,64,0,1,fp8,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,4,64,128,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,4,64,0,1,float16,float16,0,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,4,64,128,1,fp8,fp8,0,0.020784000555674236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,4,64,0,1,float16,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,4,64,128,1,float16,fp8,0,0.020058666666348774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,4,64,0,1,fp8,fp8,0,0.020703999946514767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,8,64,128,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,8,64,0,1,float16,float16,0,0.019845332950353622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,8,64,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,8,64,128,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,8,64,0,1,float16,fp8,0,0.01966399947802226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,8,64,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,32,64,128,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,32,64,0,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,32,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,32,64,128,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,32,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,32,64,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,1,64,128,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,1,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,1,64,128,1,float16,fp8,0,0.017722666263580322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,1,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,1,64,0,1,fp8,fp8,0,0.016506666938463848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,2,64,128,1,float16,float16,0,0.016117333124081295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,2,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,2,64,128,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,2,64,128,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,2,64,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,2,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,4,64,128,1,float16,float16,0,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,4,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,4,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,4,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,4,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,4,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,8,64,128,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,8,64,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,8,64,128,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,8,64,0,1,float16,fp8,0,0.016389333953460056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,8,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,8,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,32,64,128,1,float16,float16,0,0.014730667074521383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,32,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,32,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,32,64,128,1,fp8,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,32,64,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,32,64,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,1,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,1,64,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,1,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,1,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,1,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,1,64,0,1,fp8,fp8,0,0.016538667182127636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,2,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,2,64,0,1,float16,float16,0,0.016010666886965435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,2,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,2,64,128,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,2,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,2,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,4,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,4,64,0,1,float16,float16,0,0.01588800052801768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,4,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,4,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,4,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,4,64,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,8,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,8,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,8,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,8,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,8,64,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,8,64,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,32,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,32,64,0,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,32,64,128,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,32,64,128,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,32,64,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,32,64,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,1,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,1,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,1,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,1,64,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,2,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,2,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,2,64,128,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,2,64,128,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,2,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,2,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,4,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,4,64,0,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,4,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,4,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,4,64,0,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,4,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,8,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,8,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,8,64,128,1,float16,fp8,0,0.014778666198253632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,8,64,128,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,8,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,8,64,0,1,fp8,fp8,0,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,1,64,128,1,float16,float16,0,0.1803200046221415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,1,64,0,1,float16,float16,0,0.18125865856806436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,1,64,128,1,float16,fp8,0,0.17906665802001953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,1,64,0,1,float16,fp8,0,0.17987199624379477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,1,64,128,1,fp8,fp8,0,0.16946667432785034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,1,64,0,1,fp8,fp8,0,0.16778665781021118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,2,64,128,1,float16,float16,0,0.18147732814153036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,2,64,0,1,float16,float16,0,0.17958933115005493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,2,64,128,1,float16,fp8,0,0.17882666985193887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,2,64,128,1,fp8,fp8,0,0.16918933391571045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,2,64,0,1,fp8,fp8,0,0.16888533035914102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,2,64,0,1,float16,fp8,0,0.1797013282775879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,4,64,128,1,float16,float16,0,0.18263999621073404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,4,64,0,1,float16,float16,0,0.18229333559672037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,4,64,128,1,fp8,fp8,0,0.17490132649739584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,4,64,128,1,float16,fp8,0,0.18345600366592407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,4,64,0,1,float16,fp8,0,0.1818079948425293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,4,64,0,1,fp8,fp8,0,0.17478932936986288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,8,64,128,1,float16,float16,0,0.18127467234929404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,8,64,0,1,float16,float16,0,0.1825066606203715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,8,64,128,1,float16,fp8,0,0.18129066626230875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,8,64,128,1,fp8,fp8,0,0.1761066714922587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,8,64,0,1,float16,fp8,0,0.18137067556381226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,32,64,128,1,float16,float16,0,0.09787733356157939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,8,64,0,1,fp8,fp8,0,0.17546667655309042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,32,64,0,1,float16,float16,0,0.09691733121871948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,32,64,128,1,fp8,fp8,0,0.09693333506584167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,32,64,128,1,float16,fp8,0,0.09755200147628784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,32,64,0,1,float16,fp8,0,0.09716266393661499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,32,64,0,1,fp8,fp8,0,0.09687466422716777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,1,64,128,1,float16,float16,0,0.09541866183280945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,1,64,0,1,float16,float16,0,0.0950986643632253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,1,64,128,1,float16,fp8,0,0.09643733501434326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,1,64,128,1,fp8,fp8,0,0.09098133444786072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,1,64,0,1,float16,fp8,0,0.09530133008956909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,1,64,0,1,fp8,fp8,0,0.09111467003822327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,2,64,128,1,float16,float16,0,0.0974026620388031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,2,64,0,1,float16,float16,0,0.09682133793830872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,2,64,128,1,float16,fp8,0,0.09706667065620422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,2,64,128,1,fp8,fp8,0,0.09047466516494751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,2,64,0,1,float16,fp8,0,0.09541333715120952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,2,64,0,1,fp8,fp8,0,0.09087466200192769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,4,64,128,1,float16,float16,0,0.09728533029556274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,4,64,128,1,float16,fp8,0,0.09715200463930766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,4,64,0,1,float16,float16,0,0.09742400050163269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,4,64,128,1,fp8,fp8,0,0.09297600388526917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,4,64,0,1,float16,fp8,0,0.09698667128880818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,4,64,0,1,fp8,fp8,0,0.09314666191736858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,8,64,0,1,float16,float16,0,0.0958720048268636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,8,64,128,1,float16,float16,0,0.09596799810727437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,8,64,128,1,float16,fp8,0,0.09671466549237569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,8,64,128,1,fp8,fp8,0,0.0928000013033549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,32,64,128,1,float16,float16,0,0.05675200124581655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,8,64,0,1,float16,fp8,0,0.09548266728719075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,8,64,0,1,fp8,fp8,0,0.093231995900472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,32,64,0,1,float16,float16,0,0.05778666834036509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,32,64,128,1,float16,fp8,0,0.05691733459631602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,32,64,128,1,fp8,fp8,0,0.055776000022888184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,32,64,0,1,float16,fp8,0,0.05779199798901876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,32,64,0,1,fp8,fp8,0,0.055914665261904396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,1,64,128,1,float16,float16,0,0.05598933498064677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,1,64,0,1,float16,float16,0,0.05644799768924713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,1,64,128,1,float16,fp8,0,0.05606399973233541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,1,64,128,1,fp8,fp8,0,0.053914666175842285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,1,64,0,1,float16,fp8,0,0.056133334835370384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,1,64,0,1,fp8,fp8,0,0.054373333851496376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,2,64,128,1,float16,float16,0,0.056559999783833824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,2,64,0,1,float16,float16,0,0.05592533449331919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,2,64,128,1,float16,fp8,0,0.05596266686916351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,2,64,128,1,fp8,fp8,0,0.05382933219273885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,2,64,0,1,float16,fp8,0,0.05593599875768026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,2,64,0,1,fp8,fp8,0,0.05428266525268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,4,64,128,1,float16,float16,0,0.05801066756248474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,4,64,0,1,float16,float16,0,0.05632533133029938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,4,64,128,1,float16,fp8,0,0.056421334544817604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,4,64,128,1,fp8,fp8,0,0.05613866448402405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,4,64,0,1,float16,fp8,0,0.05645333230495453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,4,64,0,1,fp8,fp8,0,0.056405335664749146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,8,64,128,1,float16,float16,0,0.05637866755326589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,8,64,0,1,float16,float16,0,0.05690666536490122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,8,64,128,1,fp8,fp8,0,0.05619200070699056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,8,64,128,1,float16,fp8,0,0.05609600245952606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,8,64,0,1,float16,fp8,0,0.056101332108179726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,8,64,0,1,fp8,fp8,0,0.05585599939028422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,32,64,128,1,float16,float16,0,0.03555200000603994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,32,64,0,1,float16,float16,0,0.03382399926582972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,32,64,128,1,float16,fp8,0,0.035818666219711304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,32,64,128,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,32,64,0,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,1,64,128,1,float16,float16,0,0.03571200122435888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,32,64,0,1,fp8,fp8,0,0.03369600077470144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,1,64,0,1,float16,float16,0,0.035349334279696144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,1,64,128,1,float16,fp8,0,0.03358400116364161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,1,64,128,1,fp8,fp8,0,0.03388266762097677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,1,64,0,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,1,64,0,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,2,64,128,1,float16,float16,0,0.033573334415753685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,2,64,0,1,float16,float16,0,0.034389334420363106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,2,64,128,1,float16,fp8,0,0.035631999373435974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,2,64,128,1,fp8,fp8,0,0.03330666571855545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,2,64,0,1,float16,fp8,0,0.034272000193595886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,2,64,0,1,fp8,fp8,0,0.033743999898433685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,4,64,128,1,float16,float16,0,0.035504000882307686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,4,64,0,1,float16,float16,0,0.03551466763019562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,4,64,128,1,float16,fp8,0,0.0359199990828832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,4,64,128,1,fp8,fp8,0,0.0351200004418691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,4,64,0,1,float16,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,4,64,0,1,fp8,fp8,0,0.03349866718053818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,8,64,128,1,float16,float16,0,0.03566399961709976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,8,64,0,1,float16,float16,0,0.03555200000603994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,8,64,128,1,float16,fp8,0,0.03452266752719879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,8,64,128,1,fp8,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,8,64,0,1,float16,fp8,0,0.03538133452335993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,8,64,0,1,fp8,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,32,64,0,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,32,64,128,1,float16,float16,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,32,64,128,1,float16,fp8,0,0.02366400013367335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,32,64,128,1,fp8,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,32,64,0,1,float16,fp8,0,0.024682665864626568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,32,64,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,1,64,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,1,64,0,1,float16,float16,0,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,1,64,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,1,64,128,1,fp8,fp8,0,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,1,64,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,1,64,0,1,fp8,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,2,64,128,1,float16,float16,0,0.025077333052953083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,2,64,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,2,64,128,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,2,64,0,1,float16,fp8,0,0.02383466561635335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,2,64,128,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,2,64,0,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,4,64,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,4,64,128,1,float16,float16,0,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,4,64,128,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,4,64,128,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,4,64,0,1,float16,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,4,64,0,1,fp8,fp8,0,0.023770667612552643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,8,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,8,64,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,8,64,128,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,8,64,128,1,fp8,fp8,0,0.024906667570273083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,8,64,0,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,8,64,0,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,32,64,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,32,64,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,32,64,128,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,32,64,128,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,32,64,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,1,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,32,64,0,1,fp8,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,1,64,0,1,float16,float16,0,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,1,64,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,1,64,128,1,fp8,fp8,0,0.018031999468803406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,1,64,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,1,64,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,2,64,128,1,float16,float16,0,0.020725333442290623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,2,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,2,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,2,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,2,64,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,2,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,4,64,128,1,float16,float16,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,4,64,0,1,float16,float16,0,0.01958400011062622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,4,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,4,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,4,64,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,4,64,0,1,fp8,fp8,0,0.018079999834299088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,8,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,8,64,0,1,float16,float16,0,0.018592000007629395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,8,64,128,1,float16,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,8,64,128,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,8,64,0,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,8,64,0,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,32,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,32,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,32,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,32,64,128,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,32,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,32,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,1,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,1,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,1,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,1,64,128,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,1,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,1,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,2,64,128,1,float16,float16,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,2,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,2,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,2,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,2,64,0,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,2,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,4,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,4,64,128,1,float16,fp8,0,0.01639466608564059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,4,64,0,1,float16,float16,0,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,4,64,128,1,fp8,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,4,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,4,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,8,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,8,64,128,1,float16,float16,0,0.016352000335852306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,8,64,128,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,8,64,128,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,8,64,0,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,8,64,0,1,fp8,fp8,0,0.016682667036851246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,32,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,32,64,0,1,float16,float16,0,0.015802666544914246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,32,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,32,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,32,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,32,64,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,1,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,1,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,1,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,1,64,128,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,1,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,2,64,128,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,2,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,2,64,128,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,2,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,2,64,0,1,float16,fp8,0,0.01571200042963028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,2,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,4,64,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,4,64,128,1,float16,float16,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,4,64,128,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,4,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,4,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,4,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,8,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,8,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,8,64,0,1,float16,float16,0,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,8,64,128,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,8,64,0,1,float16,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,8,64,0,1,fp8,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,32,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,32,64,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,32,64,128,1,float16,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,32,64,128,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,32,64,0,1,float16,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,32,64,0,1,fp8,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,1,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,1,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,1,64,128,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,1,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,2,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,1,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,2,64,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,2,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,2,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,2,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,2,64,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,4,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,4,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,4,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,4,64,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,4,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,4,64,0,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,8,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,8,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,8,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,8,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,8,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,8,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,1,64,128,1,float16,float16,0,0.15641599893569946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,1,64,0,1,float16,float16,0,0.15678933262825012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,1,64,128,1,float16,fp8,0,0.15636799732844034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,1,64,128,1,fp8,fp8,0,0.14452800154685974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,1,64,0,1,float16,fp8,0,0.15664533774058023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,1,64,0,1,fp8,fp8,0,0.1455359955628713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,2,64,128,1,float16,float16,0,0.15863466262817383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,2,64,128,1,float16,fp8,0,0.15651733676592508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,2,64,0,1,float16,float16,0,0.15677332878112793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,2,64,128,1,fp8,fp8,0,0.1465226709842682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,2,64,0,1,float16,fp8,0,0.1566986640294393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,2,64,0,1,fp8,fp8,0,0.14602667093276978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,4,64,128,1,float16,float16,0,0.1586079994837443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,4,64,0,1,float16,float16,0,0.1583146651585897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,4,64,128,1,float16,fp8,0,0.15653866529464722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,4,64,128,1,fp8,fp8,0,0.14838932951291403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,4,64,0,1,float16,fp8,0,0.15664000312487283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,4,64,0,1,fp8,fp8,0,0.14641066392262778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,8,64,0,1,float16,float16,0,0.15680000185966492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,8,64,128,1,float16,fp8,0,0.15657066305478415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,8,64,128,1,fp8,fp8,0,0.14739200472831726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,8,64,128,1,float16,float16,0,0.15762133399645487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,32,64,128,1,float16,float16,0,0.08674666285514832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,8,64,0,1,float16,fp8,0,0.15646933515866598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,8,64,0,1,fp8,fp8,0,0.14834133783976236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,32,64,0,1,float16,float16,0,0.08745066324869792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,32,64,128,1,float16,fp8,0,0.0867786705493927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,32,64,0,1,float16,fp8,0,0.0867039958635966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,32,64,128,1,fp8,fp8,0,0.08288000027338664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,1,64,128,1,float16,float16,0,0.0867146650950114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,32,64,0,1,fp8,fp8,0,0.08359466989835103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,1,64,0,1,float16,float16,0,0.08661866188049316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,1,64,128,1,float16,fp8,0,0.0849226713180542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,1,64,128,1,fp8,fp8,0,0.07930666704972585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,1,64,0,1,float16,fp8,0,0.08687466382980347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,1,64,0,1,fp8,fp8,0,0.08078399797280629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,2,64,0,1,float16,float16,0,0.08481599887212117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,2,64,128,1,float16,float16,0,0.0849120020866394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,2,64,128,1,float16,fp8,0,0.0867146650950114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,2,64,128,1,fp8,fp8,0,0.0804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,2,64,0,1,fp8,fp8,0,0.08067733546098073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,2,64,0,1,float16,fp8,0,0.08547733227411906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,4,64,128,1,float16,float16,0,0.08666132887204488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,4,64,0,1,float16,float16,0,0.08709866801897685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,4,64,128,1,float16,fp8,0,0.0849173367023468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,4,64,128,1,fp8,fp8,0,0.08259200056393941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,4,64,0,1,float16,fp8,0,0.08681600292523702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,4,64,0,1,fp8,fp8,0,0.08283733328183492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,8,64,128,1,float16,float16,0,0.0865760048230489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,8,64,0,1,float16,float16,0,0.0867680013179779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,8,64,128,1,float16,fp8,0,0.08693866928418477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,8,64,128,1,fp8,fp8,0,0.08278400202592213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,8,64,0,1,float16,fp8,0,0.08689066767692566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,8,64,0,1,fp8,fp8,0,0.08083733419577281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,32,64,128,1,float16,float16,0,0.04971200227737427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,32,64,0,1,float16,float16,0,0.05182399849096934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,32,64,128,1,float16,fp8,0,0.049914668003718056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,32,64,128,1,fp8,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,32,64,0,1,float16,fp8,0,0.05029866596062978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,32,64,0,1,fp8,fp8,0,0.0481333335240682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,1,64,128,1,float16,float16,0,0.0497920016447703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,1,64,0,1,float16,float16,0,0.0499946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,1,64,128,1,float16,fp8,0,0.05017066498597463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,1,64,128,1,fp8,fp8,0,0.0480320006608963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,1,64,0,1,float16,fp8,0,0.049925332268079124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,1,64,0,1,fp8,fp8,0,0.04781333108743032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,2,64,128,1,float16,float16,0,0.049728001157442726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,2,64,0,1,float16,float16,0,0.04967466493447622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,2,64,128,1,float16,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,2,64,128,1,fp8,fp8,0,0.04774933556715647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,2,64,0,1,float16,fp8,0,0.05008000135421753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,2,64,0,1,fp8,fp8,0,0.046384001771608986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,4,64,128,1,float16,float16,0,0.050069332122802734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,4,64,0,1,float16,float16,0,0.05095999936262766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,4,64,128,1,float16,fp8,0,0.05030400057633718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,4,64,128,1,fp8,fp8,0,0.0476800004641215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,4,64,0,1,float16,fp8,0,0.05167999863624573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,4,64,0,1,fp8,fp8,0,0.04780800143877665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,8,64,128,1,float16,float16,0,0.04982399940490723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,8,64,0,1,float16,float16,0,0.05109333495299021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,8,64,128,1,float16,fp8,0,0.04994133114814758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,8,64,128,1,fp8,fp8,0,0.04791999856630961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,8,64,0,1,float16,fp8,0,0.05012799799442291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,8,64,0,1,fp8,fp8,0,0.04827199876308441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,32,64,128,1,float16,float16,0,0.031141333281993866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,32,64,0,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,32,64,128,1,float16,fp8,0,0.031717332700888314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,32,64,128,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,32,64,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,32,64,0,1,fp8,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,1,64,128,1,float16,float16,0,0.031685332457224526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,1,64,0,1,float16,float16,0,0.031194667021433514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,1,64,128,1,float16,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,1,64,128,1,fp8,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,1,64,0,1,float16,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,1,64,0,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,2,64,128,1,float16,float16,0,0.03155199935038885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,2,64,0,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,2,64,128,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,2,64,128,1,fp8,fp8,0,0.03001066545645396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,2,64,0,1,float16,fp8,0,0.031231999397277832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,2,64,0,1,fp8,fp8,0,0.0310506671667099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,4,64,128,1,float16,float16,0,0.03126933425664902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,4,64,0,1,float16,float16,0,0.031221332649389904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,4,64,128,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,4,64,128,1,fp8,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,4,64,0,1,float16,fp8,0,0.03169066707293192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,4,64,0,1,fp8,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,8,64,128,1,float16,float16,0,0.03130666663249334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,8,64,128,1,float16,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,8,64,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,8,64,128,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,8,64,0,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,8,64,0,1,fp8,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,32,64,128,1,float16,float16,0,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,32,64,0,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,32,64,128,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,32,64,128,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,32,64,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,32,64,0,1,fp8,fp8,0,0.021962667504946392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,1,64,128,1,float16,float16,0,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,1,64,0,1,float16,float16,0,0.022815999885400135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,1,64,128,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,1,64,128,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,1,64,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,1,64,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,2,64,128,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,2,64,0,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,2,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,2,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,2,64,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,2,64,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,4,64,128,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,4,64,0,1,float16,float16,0,0.022944000860055287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,4,64,128,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,4,64,128,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,4,64,0,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,4,64,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,8,64,128,1,float16,float16,0,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,8,64,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,8,64,128,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,8,64,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,8,64,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,8,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,32,64,128,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,32,64,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,32,64,128,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,32,64,128,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,32,64,0,1,float16,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,32,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,1,64,128,1,float16,float16,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,1,64,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,1,64,128,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,1,64,128,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,1,64,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,1,64,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,2,64,128,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,2,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,2,64,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,2,64,128,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,2,64,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,2,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,4,64,128,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,4,64,0,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,4,64,128,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,4,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,4,64,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,4,64,0,1,fp8,fp8,0,0.017727999637524288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,8,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,8,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,8,64,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,8,64,128,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,8,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,8,64,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,32,64,128,1,float16,float16,0,0.015647999942302704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,32,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,32,64,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,32,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,32,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,32,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,1,64,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,1,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,1,64,128,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,1,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,1,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,2,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,2,64,0,1,float16,float16,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,2,64,128,1,float16,fp8,0,0.015754666179418564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,2,64,128,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,2,64,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,2,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,4,64,128,1,float16,float16,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,4,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,4,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,4,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,4,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,4,64,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,8,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,8,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,8,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,8,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,8,64,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,8,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,32,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,32,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,32,64,128,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,32,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,32,64,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,32,64,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,1,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,1,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,1,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,1,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,1,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,1,64,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,2,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,2,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,2,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,2,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,2,64,0,1,float16,fp8,0,0.016469333320856094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,2,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,4,64,128,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,4,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,4,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,4,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,4,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,4,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,8,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,8,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,8,64,128,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,8,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,8,64,0,1,float16,fp8,0,0.015781333049138386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,8,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,32,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,32,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,32,64,128,1,float16,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,32,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,32,64,0,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,32,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,1,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,1,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,1,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,1,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,1,64,128,1,fp8,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,1,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,2,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,2,64,0,1,float16,float16,0,0.015669333438078564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,2,64,128,1,float16,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,2,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,2,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,4,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,2,64,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,4,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,4,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,4,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,4,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,4,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,8,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,8,64,0,1,float16,float16,0,0.016800000021855038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,8,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,8,64,128,1,fp8,fp8,0,0.015775999675194424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,8,64,0,1,float16,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,8,64,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,0,0.1344000001748403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,0,0.1341600020726522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,0,0.13674666484196982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,1,64,128,1,fp8,fp8,0,0.12345066666603088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,0,0.13412266969680786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,1,64,0,1,fp8,fp8,0,0.12411200006802876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,0,0.13593066732088724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,0,0.1354986627896627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,0,0.13633066415786743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,2,64,128,1,fp8,fp8,0,0.1237440009911855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,0,0.13606933752695718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,2,64,0,1,fp8,fp8,0,0.12390399972597758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,0,0.1343946655591329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,0,0.1341333289941152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,0,0.13637866576512656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,4,64,128,1,fp8,fp8,0,0.12293866276741028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,0,0.1341866652170817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,4,64,0,1,fp8,fp8,0,0.12388799587885539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,0,0.13402666648228964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,0,0.1342026690642039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,0,0.1362826625506083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,8,64,128,1,fp8,fp8,0,0.1221386690934499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,0,0.1340000033378601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,32,64,128,1,float16,float16,0,0.07482666770617168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,8,64,0,1,fp8,fp8,0,0.12380799651145935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,0,0.07469333211580913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,32,64,128,1,float16,fp8,0,0.07436800003051758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,32,64,128,1,fp8,fp8,0,0.0685280015071233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,0,0.07493333518505096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,32,64,0,1,fp8,fp8,0,0.0686773310105006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,0,0.07459733386834462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,0,0.07285866638024648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,0,0.0755573312441508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,1,64,128,1,fp8,fp8,0,0.06856533388296764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,0,0.07460266848405202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,1,64,0,1,fp8,fp8,0,0.06739733119805653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,0,0.07412800192832947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,0,0.0747519979874293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,2,64,128,1,fp8,fp8,0,0.06841599941253662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,0,0.07512533167997996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,0,0.07419733206431071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,2,64,0,1,fp8,fp8,0,0.0689333329598109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,0,0.07464533547560374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,0,0.0751039981842041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,0,0.07415999968846639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,4,64,128,1,fp8,fp8,0,0.06863466898600261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,0,0.07457066575686137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,4,64,0,1,fp8,fp8,0,0.06846933563550313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,0,0.0740479975938797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,0,0.07455466687679291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,0,0.07461866736412048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,0,0.07468800246715546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,8,64,128,1,fp8,fp8,0,0.06863999863465627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,32,64,128,1,float16,float16,0,0.04394133388996124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,8,64,0,1,fp8,fp8,0,0.06799466907978058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,0,0.04465066889921824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,32,64,128,1,fp8,fp8,0,0.04172799984614054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,32,64,128,1,float16,fp8,0,0.04473066826661428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,0,0.044026667873064675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,32,64,0,1,fp8,fp8,0,0.04167999823888143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,0,0.04359466830889384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,0,0.04436799883842468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,1,64,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,0,0.04377066592375437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,0,0.045824001232783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,1,64,0,1,fp8,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,0,0.04506133496761322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,2,64,128,1,fp8,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,0,0.04562133550643921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,2,64,0,1,fp8,fp8,0,0.04193066557248434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,0,0.0454773356517156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,0,0.04586666822433472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,0,0.04576000074545542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,4,64,128,1,fp8,fp8,0,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,0,0.04557866851488749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,4,64,0,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,0,0.04418133199214935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,0,0.04362666606903076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,0,0.04404800136884054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,8,64,128,1,fp8,fp8,0,0.0414986660083135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,0,0.043978666265805565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,8,64,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,32,64,128,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,0,0.028581333657105763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,32,64,128,1,float16,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,32,64,128,1,fp8,fp8,0,0.028346667687098186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,32,64,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,0,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,0,0.02977066735426585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,1,64,128,1,fp8,fp8,0,0.027834666272004444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,0,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,1,64,0,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,0,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,2,64,128,1,fp8,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,2,64,0,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,0,0.0296426663796107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,4,64,128,1,fp8,fp8,0,0.029167999823888142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,0,0.03029866764942805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,4,64,0,1,fp8,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,0,0.029157333076000214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,0,0.031034665803114574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,8,64,128,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,8,64,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,32,64,128,1,float16,float16,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,32,64,128,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,32,64,128,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,0,0.021669333179791767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,32,64,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,1,64,128,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,0,0.022111999491850536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,1,64,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,0,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,2,64,128,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,2,64,0,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,4,64,128,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,4,64,0,1,fp8,fp8,0,0.02293866624434789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,0,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,8,64,128,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,32,64,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,8,64,0,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,32,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,32,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,32,64,0,1,fp8,fp8,0,0.01800000046690305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,1,64,128,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,1,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,0,0.018085333208243053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,2,64,128,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,2,64,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,4,64,128,1,fp8,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,4,64,0,1,fp8,fp8,0,0.01802666609485944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,8,64,128,1,fp8,fp8,0,0.017877332866191864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,8,64,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,32,64,128,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,32,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,32,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,32,64,0,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,1,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,1,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,2,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,2,64,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,4,64,128,1,fp8,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,4,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,0,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,0,0.016042667130629223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,8,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,8,64,0,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,32,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,32,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,32,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,32,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,1,64,128,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,1,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,2,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,2,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,0,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,4,64,128,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,4,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,8,64,128,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,0,0.016154666741689045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,8,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,32,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,32,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,32,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,32,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,1,64,128,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,1,64,0,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,2,64,128,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,2,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,4,64,128,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,4,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,0,0.014752000570297241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,8,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,8,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,float16,0,1.2451999982198079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,1,64,128,1,float16,fp8,0,1.2537866433461506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,1,64,128,1,fp8,fp8,0,1.1382933457692463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,float16,0,6.434485117594401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,1,64,0,1,float16,fp8,0,6.454074859619141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,float16,0,1.2644373575846355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,1,64,0,1,fp8,fp8,0,5.539295832316081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,2,64,128,1,float16,fp8,0,1.2752319971720378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,2,64,128,1,fp8,fp8,0,1.1583253542582195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,float16,0,1.2695199648539226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,float16,0,6.466250737508138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,2,64,0,1,fp8,fp8,0,5.563285191853841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,2,64,0,1,float16,fp8,0,6.480096181233724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,4,64,128,1,float16,fp8,0,1.2857920328776042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,4,64,128,1,fp8,fp8,0,1.1742560068766277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,float16,0,6.486213048299153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,float16,0,1.3063626289367676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,8,64,128,1,float16,fp8,0,1.3213866551717122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,4,64,0,1,fp8,fp8,0,5.585573196411133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,8,64,128,1,fp8,fp8,0,1.2140906651814778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,4,64,0,1,float16,fp8,0,6.4999574025472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,float16,0,0.7390826543172201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,float16,0,6.537354787190755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,24,64,128,1,float16,fp8,0,0.7570026715596517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,24,64,128,1,fp8,fp8,0,0.7057332992553711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,float16,0,3.410954793294271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,8,64,0,1,fp8,fp8,0,5.620586395263672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,8,64,0,1,float16,fp8,0,6.552752176920573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,float16,0,0.6573119958241781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,1,64,128,1,float16,fp8,0,0.6649279991785685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,24,64,0,1,fp8,fp8,0,2.958394686381022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,24,64,0,1,float16,fp8,0,3.427797317504883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,1,64,128,1,fp8,fp8,0,0.6052053372065226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,float16,0,0.6622186501820883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,float16,0,3.2954559326171875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,2,64,128,1,float16,fp8,0,0.666650652885437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,1,64,0,1,fp8,fp8,0,2.849813461303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,1,64,0,1,float16,fp8,0,3.2985706329345703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,2,64,128,1,fp8,fp8,0,0.6115093231201172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,float16,0,0.6685173511505127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,float16,0,3.297343889872233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,4,64,128,1,float16,fp8,0,0.6750666300455729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,2,64,0,1,fp8,fp8,0,2.853658676147461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,2,64,0,1,float16,fp8,0,3.3154932657877603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,4,64,128,1,fp8,fp8,0,0.6187839905420939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,float16,0,0.6831519603729248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,float16,0,3.3173386255900064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,8,64,128,1,float16,fp8,0,0.6917920112609863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,4,64,0,1,fp8,fp8,0,2.86301326751709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,8,64,128,1,fp8,fp8,0,0.6363573471705118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,4,64,0,1,float16,fp8,0,3.313653310139974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,float16,0,0.4123893181482951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,float16,0,3.3285439809163413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,24,64,128,1,float16,fp8,0,0.42268800735473633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,float16,0,1.7883893648783367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,24,64,128,1,fp8,fp8,0,0.39822399616241455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,8,64,0,1,fp8,fp8,0,2.881045341491699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,8,64,0,1,float16,fp8,0,3.3404159545898438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,float16,0,0.3739146788914998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,1,64,128,1,float16,fp8,0,0.3772960106531779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,24,64,0,1,fp8,fp8,0,1.557978630065918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,24,64,0,1,float16,fp8,0,1.7991679509480794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,1,64,128,1,fp8,fp8,0,0.347109317779541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,float16,0,1.734026590983073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,float16,0,0.3744800090789795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,2,64,128,1,float16,fp8,0,0.37931732336680096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,1,64,0,1,fp8,fp8,0,1.5066506067911785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,1,64,0,1,float16,fp8,0,1.7321866353352864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,2,64,128,1,fp8,fp8,0,0.350874662399292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,float16,0,1.7359520594278972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,float16,0,0.3794826666514079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,2,64,0,1,fp8,fp8,0,1.5064160029093425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,2,64,0,1,float16,fp8,0,1.7363893191019695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,4,64,128,1,float16,fp8,0,0.3840906620025635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,4,64,128,1,fp8,fp8,0,0.35589865843455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,float16,0,1.7376799583435059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,float16,0,0.3863146702448527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,4,64,0,1,fp8,fp8,0,1.5168266296386719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,8,64,128,1,float16,fp8,0,0.39239998658498126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,4,64,0,1,float16,fp8,0,1.7407199541727703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,8,64,128,1,fp8,fp8,0,0.3644373416900635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,float16,0,1.751471996307373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,float16,0,0.29184534152348834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,24,64,128,1,float16,fp8,0,0.29051733016967773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,8,64,0,1,fp8,fp8,0,1.5222400029500325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,8,64,0,1,float16,fp8,0,1.7527893384297688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,float16,0,1.0198346773783367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,24,64,128,1,fp8,fp8,0,0.2733386754989624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,float16,0,0.2887093424797058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,24,64,0,1,float16,fp8,0,1.020016034444173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,24,64,0,1,fp8,fp8,0,0.8870986302693685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,1,64,128,1,float16,fp8,0,0.2893706758817037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,float16,0,1.0108799934387207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,1,64,128,1,fp8,fp8,0,0.2726080020268758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,float16,0,0.289792001247406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,1,64,0,1,float16,fp8,0,1.0049760341644287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,1,64,0,1,fp8,fp8,0,0.885637362798055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,2,64,128,1,float16,fp8,0,0.289792001247406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,float16,0,1.0085066954294841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,2,64,128,1,fp8,fp8,0,0.27114667495091754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,float16,0,0.2913279930750529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,2,64,0,1,float16,fp8,0,1.0093440214792888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,2,64,0,1,fp8,fp8,0,0.8827573458353678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,4,64,128,1,float16,fp8,0,0.2898079951604207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,float16,0,1.011034647623698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,4,64,128,1,fp8,fp8,0,0.2732693354288737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,float16,0,0.29202133417129517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,4,64,0,1,float16,fp8,0,1.0085439682006836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,4,64,0,1,fp8,fp8,0,0.8851786454518636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,8,64,128,1,fp8,fp8,0,0.2715733249982198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,8,64,128,1,float16,fp8,0,0.2919626633326213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,float16,0,1.0140000184377034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,8,64,0,1,fp8,fp8,0,0.8882986704508463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,8,64,0,1,float16,fp8,0,1.0141440232594807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,float16,0,0.9309386412302653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,1,64,128,1,float16,fp8,0,0.9418826897939047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,1,64,128,1,fp8,fp8,0,0.8502453168233236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,float16,0,3.8519681294759116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,float16,0,0.937269369761149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,1,64,0,1,fp8,fp8,0,3.318943977355957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,1,64,0,1,float16,fp8,0,3.8469813664754233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,2,64,128,1,fp8,fp8,0,0.8626240094502767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,2,64,128,1,float16,fp8,0,0.9465386867523193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,float16,0,0.9460960229237875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,float16,0,3.851231892903646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,4,64,128,1,float16,fp8,0,0.9582186539967855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,2,64,0,1,float16,fp8,0,3.857247988382975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,2,64,0,1,fp8,fp8,0,3.332490603129069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,4,64,128,1,fp8,fp8,0,0.8745600382486979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,float16,0,3.8684587478637695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,float16,0,0.9696853160858154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,8,64,128,1,float16,fp8,0,0.9822506904602051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,4,64,0,1,fp8,fp8,0,3.34445858001709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,4,64,0,1,float16,fp8,0,3.8774239222208657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,8,64,128,1,fp8,fp8,0,0.90448530515035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,float16,0,0.5582240025202433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,float16,0,3.899504025777181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,24,64,128,1,float16,fp8,0,0.5723093350728353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,8,64,0,1,fp8,fp8,0,3.3728853861490884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,8,64,0,1,float16,fp8,0,3.9173332850138345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,float16,0,2.065397262573242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,24,64,128,1,fp8,fp8,0,0.5335679848988851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,float16,0,0.4969013532002767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,1,64,128,1,float16,fp8,0,0.5005919933319092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,24,64,0,1,float16,fp8,0,2.074650605519613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,24,64,0,1,fp8,fp8,0,1.802720069885254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,float16,0,1.9827094078063965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,1,64,128,1,fp8,fp8,0,0.45766933759053546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,float16,0,0.5005173285802206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,2,64,128,1,float16,fp8,0,0.5055466492970785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,1,64,0,1,fp8,fp8,0,1.719823996225993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,1,64,0,1,float16,fp8,0,1.9834826787312825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,float16,0,1.9868906339009602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,2,64,128,1,fp8,fp8,0,0.4634133179982503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,float16,0,0.5048746665318807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,2,64,0,1,fp8,fp8,0,1.7226932843526204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,2,64,0,1,float16,fp8,0,1.9914132754007976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,4,64,128,1,float16,fp8,0,0.5111733277638754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,4,64,128,1,fp8,fp8,0,0.46967466672261554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,float16,0,1.9939732551574707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,float16,0,0.5158666769663492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,4,64,0,1,fp8,fp8,0,1.7325545946757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,4,64,0,1,float16,fp8,0,1.9999574025472004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,8,64,128,1,float16,fp8,0,0.5237706502278646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,8,64,128,1,fp8,fp8,0,0.4822346766789754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,float16,0,2.009610652923584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,float16,0,0.3144266605377197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,24,64,128,1,float16,fp8,0,0.32282666365305585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,float16,0,1.0974986553192139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,8,64,0,1,fp8,fp8,0,1.7437920570373535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,8,64,0,1,float16,fp8,0,2.0172425905863443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,24,64,128,1,fp8,fp8,0,0.3044373393058777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,float16,0,0.2832746704419454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,24,64,0,1,fp8,fp8,0,0.9643466472625732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,1,64,128,1,float16,fp8,0,0.2852960030237834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,24,64,0,1,float16,fp8,0,1.1064426898956299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,float16,0,1.0518666903177898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,1,64,128,1,fp8,fp8,0,0.2653226653734843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,float16,0,0.2840426762898763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,1,64,0,1,fp8,fp8,0,0.923695961634318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,1,64,0,1,float16,fp8,0,1.0558186372121174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,2,64,128,1,float16,fp8,0,0.2876799901326497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,2,64,128,1,fp8,fp8,0,0.26796799898147583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,float16,0,1.0575466950734456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,float16,0,0.288975993792216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,2,64,0,1,float16,fp8,0,1.058677355448405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,2,64,0,1,fp8,fp8,0,0.9263733228047689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,4,64,128,1,float16,fp8,0,0.2924319903055827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,float16,0,1.0612266858418782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,4,64,128,1,fp8,fp8,0,0.27086933453877765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,float16,0,0.29385600487391156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,4,64,0,1,float16,fp8,0,1.0652586619059246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,4,64,0,1,fp8,fp8,0,0.9273173014322916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,8,64,128,1,float16,fp8,0,0.2980159918467204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,float16,0,1.0684746901194255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,8,64,128,1,fp8,fp8,0,0.2795146703720093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,float16,0,0.222271998723348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,8,64,0,1,float16,fp8,0,1.0736479759216309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,float16,0,0.645248015721639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,24,64,128,1,float16,fp8,0,0.2225386699040731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,8,64,0,1,fp8,fp8,0,0.9357653458913168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,24,64,128,1,fp8,fp8,0,0.20971733331680298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,float16,0,0.21775466203689575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,24,64,0,1,fp8,fp8,0,0.567903995513916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,24,64,0,1,float16,fp8,0,0.6459093491236368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,1,64,128,1,float16,fp8,0,0.22017600138982138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,float16,0,0.6354506810506185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,1,64,128,1,fp8,fp8,0,0.20616000890731812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,float16,0,0.21911466121673584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,1,64,0,1,float16,fp8,0,0.6337173382441202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,1,64,0,1,fp8,fp8,0,0.560485323270162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,2,64,128,1,fp8,fp8,0,0.20774400234222412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,2,64,128,1,float16,fp8,0,0.21845867236455283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,float16,0,0.6342720190684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,float16,0,0.22000000874201456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,2,64,0,1,fp8,fp8,0,0.559994657834371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,2,64,0,1,float16,fp8,0,0.6347946723302206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,4,64,128,1,float16,fp8,0,0.22022932767868042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,4,64,128,1,fp8,fp8,0,0.2076693375905355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,float16,0,0.6363679965337118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,float16,0,0.22011200586954752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,4,64,0,1,float16,fp8,0,0.6383999983469645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,4,64,0,1,fp8,fp8,0,0.5619253317515055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,8,64,128,1,fp8,fp8,0,0.20784000555674234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,8,64,128,1,float16,fp8,0,0.22022932767868042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,float16,0,0.6387840112050375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,8,64,0,1,fp8,fp8,0,0.5655466715494791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,8,64,0,1,float16,fp8,0,0.6392693519592285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,float16,0,0.7770613034566244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,1,64,128,1,float16,fp8,0,0.7834506829579672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,1,64,128,1,fp8,fp8,0,0.7099839846293131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,float16,0,0.781381368637085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,float16,0,2.788309415181478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,1,64,0,1,fp8,fp8,0,2.4180213610331216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,2,64,128,1,float16,fp8,0,0.7903839747111002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,1,64,0,1,float16,fp8,0,2.7988001505533853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,2,64,128,1,fp8,fp8,0,0.717583974202474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,float16,0,2.801823933919271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,float16,0,0.7894079685211182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,2,64,0,1,fp8,fp8,0,2.4268479347229004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,2,64,0,1,float16,fp8,0,2.8033440907796225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,4,64,128,1,float16,fp8,0,0.798362652460734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,4,64,128,1,fp8,fp8,0,0.7282933394114176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,float16,0,0.8090506394704183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,float16,0,2.812405268351237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,4,64,0,1,fp8,fp8,0,2.4375999768575034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,4,64,0,1,float16,fp8,0,2.8155625661214194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,8,64,128,1,float16,fp8,0,0.8200426896413168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,8,64,128,1,fp8,fp8,0,0.7523252964019775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,float16,0,0.4686346848805745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,float16,0,2.834453264872233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,24,64,128,1,float16,fp8,0,0.47862935066223145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,8,64,0,1,fp8,fp8,0,2.4639412562052407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,8,64,0,1,float16,fp8,0,2.843423843383789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,24,64,128,1,fp8,fp8,0,0.4474666515986125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,float16,0,1.5173865954081218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,float16,0,0.4161279996236165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,24,64,0,1,float16,fp8,0,1.5294826825459797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,24,64,0,1,fp8,fp8,0,1.3290506998697917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,1,64,128,1,float16,fp8,0,0.4205973148345947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,float16,0,1.4484373728434246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,1,64,128,1,fp8,fp8,0,0.3847359816233317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,float16,0,0.4188479979832967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,1,64,0,1,fp8,fp8,0,1.261029322942098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,1,64,0,1,float16,fp8,0,1.451050599416097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,2,64,128,1,float16,fp8,0,0.4241066773732503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,2,64,128,1,fp8,fp8,0,0.3887733221054077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,float16,0,1.4522080421447754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,float16,0,0.4232960144678752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,2,64,0,1,fp8,fp8,0,1.2668906847635906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,2,64,0,1,float16,fp8,0,1.4537653923034668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,4,64,128,1,float16,fp8,0,0.43022934595743817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,float16,0,1.4593812624613445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,4,64,128,1,fp8,fp8,0,0.3949066797892253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,float16,0,0.4331680138905843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,4,64,0,1,float16,fp8,0,1.460645357767741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,4,64,0,1,fp8,fp8,0,1.270410696665446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,8,64,128,1,float16,fp8,0,0.4389653205871582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,float16,0,1.470666726430257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,8,64,128,1,fp8,fp8,0,0.4052213430404663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,float16,0,0.2632853388786316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,8,64,0,1,float16,fp8,0,1.4771626790364583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,float16,0,0.8147626717885336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,24,64,128,1,float16,fp8,0,0.2693279981613159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,8,64,0,1,fp8,fp8,0,1.2828853130340576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,24,64,128,1,fp8,fp8,0,0.2566879987716675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,float16,0,0.23430933554967245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,24,64,0,1,fp8,fp8,0,0.7200106779734293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,1,64,128,1,float16,fp8,0,0.2363146742184957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,24,64,0,1,float16,fp8,0,0.821066697438558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,1,64,128,1,fp8,fp8,0,0.22243734200795492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,float16,0,0.7744906743367513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,float16,0,0.2366186579068502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,1,64,0,1,fp8,fp8,0,0.6832533677419027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,1,64,0,1,float16,fp8,0,0.7778933048248291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,2,64,128,1,float16,fp8,0,0.23853333791097006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,2,64,128,1,fp8,fp8,0,0.2241013248761495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,float16,0,0.7773333390553793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,float16,0,0.24039467175801596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,2,64,0,1,float16,fp8,0,0.7815573215484619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,2,64,0,1,fp8,fp8,0,0.6853439807891846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,4,64,128,1,float16,fp8,0,0.24259734153747559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,float16,0,0.7814453442891439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,4,64,128,1,fp8,fp8,0,0.22818666696548462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,float16,0,0.24657066663106283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,4,64,0,1,float16,fp8,0,0.7836533387502035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,4,64,0,1,fp8,fp8,0,0.6887359619140625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,8,64,128,1,float16,fp8,0,0.24909865856170654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,8,64,128,1,fp8,fp8,0,0.23456533749898276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,float16,0,0.7910079956054688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,float16,0,0.18939733505249023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,8,64,0,1,fp8,fp8,0,0.6957013607025146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,8,64,0,1,float16,fp8,0,0.7940906683603922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,float16,0,0.4896693229675293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,24,64,128,1,float16,fp8,0,0.18996800978978476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,24,64,128,1,fp8,fp8,0,0.17865065733591715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,float16,0,0.18526933590571085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,24,64,0,1,fp8,fp8,0,0.4321173429489136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,24,64,0,1,float16,fp8,0,0.49029866854349774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,1,64,128,1,float16,fp8,0,0.1852746605873108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,1,64,128,1,fp8,fp8,0,0.17536532878875732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,float16,0,0.4803680181503296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,float16,0,0.18634132544199625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,1,64,0,1,float16,fp8,0,0.47994665304819745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,1,64,0,1,fp8,fp8,0,0.42450133959452313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,2,64,128,1,fp8,fp8,0,0.17524266242980957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,2,64,128,1,float16,fp8,0,0.1873226761817932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,float16,0,0.4803413152694702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,float16,0,0.1873226761817932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,2,64,0,1,fp8,fp8,0,0.4267520109812419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,2,64,0,1,float16,fp8,0,0.4802079995473226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,4,64,128,1,float16,fp8,0,0.18710933128992716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,4,64,128,1,fp8,fp8,0,0.17643733819325766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,float16,0,0.4819253285725911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,float16,0,0.1872746745745341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,4,64,0,1,fp8,fp8,0,0.425162672996521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,4,64,0,1,float16,fp8,0,0.4822133382161458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,8,64,128,1,fp8,fp8,0,0.17650665839513144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,8,64,128,1,float16,fp8,0,0.18758400281270346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,float16,0,0.483184019724528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,8,64,0,1,fp8,fp8,0,0.42953598499298096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,8,64,0,1,float16,fp8,0,0.48261332511901855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,float16,0,1.2123839855194092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,1,64,128,1,fp8,fp8,0,1.1030773321787517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,1,64,128,1,float16,fp8,0,1.2176427046457927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,float16,0,1.2259039878845215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,float16,0,3.7211891810099282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,1,64,0,1,fp8,fp8,0,3.2351306279500327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,2,64,128,1,float16,fp8,0,1.23963729540507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,1,64,0,1,float16,fp8,0,3.737423896789551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,2,64,128,1,fp8,fp8,0,1.1256852944691975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,float16,0,3.755253473917643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,float16,0,1.2382720311482747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,2,64,0,1,fp8,fp8,0,3.2538026173909507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,2,64,0,1,float16,fp8,0,3.759157180786133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,4,64,128,1,float16,fp8,0,1.2505919933319092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,4,64,128,1,fp8,fp8,0,1.1390506426493328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,float16,0,3.771183967590332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,float16,0,1.2728053728739421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,4,64,0,1,fp8,fp8,0,3.2731145222981772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,4,64,0,1,float16,fp8,0,3.775925318400065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,8,64,128,1,float16,fp8,0,1.2876213391621907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,8,64,128,1,fp8,fp8,0,1.1805546283721924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,float16,0,3.8145599365234375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,float16,0,0.7081546783447266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,24,64,128,1,float16,fp8,0,0.7223412990570068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,8,64,0,1,fp8,fp8,0,3.3138081232706704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,8,64,0,1,float16,fp8,0,3.822277386983236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,24,64,128,1,fp8,fp8,0,0.6707519690195719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,float16,0,2.012826601664225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,float16,0,0.6237066586812338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,24,64,0,1,fp8,fp8,0,1.7705012957255046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,1,64,128,1,float16,fp8,0,0.6297173500061035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,24,64,0,1,float16,fp8,0,2.0271733601888022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,float16,0,1.9046613375345867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,1,64,128,1,fp8,fp8,0,0.5708906650543213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,float16,0,0.6284106572469076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,1,64,0,1,fp8,fp8,0,1.652773380279541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,1,64,0,1,float16,fp8,0,1.9088853200276692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,2,64,128,1,float16,fp8,0,0.6359253327051798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,2,64,128,1,fp8,fp8,0,0.5771146615346273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,float16,0,1.9112000465393066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,float16,0,0.6347200075785319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,2,64,0,1,fp8,fp8,0,1.6636212666829426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,2,64,0,1,float16,fp8,0,1.9173706372578938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,4,64,128,1,float16,fp8,0,0.6419093211491903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,4,64,128,1,fp8,fp8,0,0.5851626793543497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,float16,0,1.9189173380533855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,float16,0,0.6491893529891968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,4,64,0,1,float16,fp8,0,1.9266133308410645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,4,64,0,1,fp8,fp8,0,1.6684373219807942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,8,64,128,1,float16,fp8,0,0.6600000063578287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,float16,0,1.93722136815389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,8,64,128,1,fp8,fp8,0,0.6027306715647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,float16,0,0.3778880039850871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,24,64,128,1,float16,fp8,0,0.38788266976674396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,float16,0,1.050485372543335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,8,64,0,1,fp8,fp8,0,1.6925066312154133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,8,64,0,1,float16,fp8,0,1.9468852678934734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,24,64,128,1,fp8,fp8,0,0.36137068271636963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,float16,0,0.33561599254608154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,24,64,0,1,fp8,fp8,0,0.9295840263366699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,24,64,0,1,float16,fp8,0,1.0602293014526367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,1,64,128,1,float16,fp8,0,0.33879466851552326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,float16,0,0.9936426480611166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,1,64,128,1,fp8,fp8,0,0.3122719923655192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,float16,0,0.33875731627146405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,1,64,0,1,float16,fp8,0,0.9982186953226725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,1,64,0,1,fp8,fp8,0,0.8724533716837565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,2,64,128,1,float16,fp8,0,0.34113065401713055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,2,64,128,1,fp8,fp8,0,0.3163253267606099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,float16,0,0.9980800151824951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,float16,0,0.3428106705347697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,2,64,0,1,float16,fp8,0,1.0011520385742188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,2,64,0,1,fp8,fp8,0,0.8766826788584391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,4,64,128,1,float16,fp8,0,0.3473546504974365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,4,64,128,1,fp8,fp8,0,0.32025599479675293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,float16,0,1.0040533542633057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,float16,0,0.34916265805562335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,4,64,0,1,fp8,fp8,0,0.880517323811849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,4,64,0,1,float16,fp8,0,1.006325324376424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,8,64,128,1,float16,fp8,0,0.3552853266398112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,8,64,128,1,fp8,fp8,0,0.3285920023918152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,float16,0,1.0145760377248128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,float16,0,0.21702400843302408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,8,64,0,1,fp8,fp8,0,0.8894293308258057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,8,64,0,1,float16,fp8,0,1.0204799969991047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,24,64,128,1,float16,fp8,0,0.22189333041508993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,24,64,128,1,fp8,fp8,0,0.20989867051442465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,float16,0,0.5722080071767172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,float16,0,0.19075733423233032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,24,64,0,1,fp8,fp8,0,0.5110666751861572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,24,64,0,1,float16,fp8,0,0.5796159903208414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,1,64,128,1,float16,fp8,0,0.19344000021616617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,float16,0,0.5402986605962118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,1,64,128,1,fp8,fp8,0,0.1829599936803182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,float16,0,0.19270400206247965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,1,64,0,1,float16,fp8,0,0.541317343711853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,1,64,0,1,fp8,fp8,0,0.47810133298238117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,float16,0,0.5399413506189982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,2,64,128,1,fp8,fp8,0,0.18518932660420737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,2,64,128,1,float16,fp8,0,0.1946293314297994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,float16,0,0.19565333922704062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,2,64,0,1,float16,fp8,0,0.5446933507919312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,2,64,0,1,fp8,fp8,0,0.4787413279215495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,float16,0,0.5438880125681559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,4,64,128,1,fp8,fp8,0,0.18739734093348184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,4,64,128,1,float16,fp8,0,0.1973759929339091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,float16,0,0.20152000586191812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,4,64,0,1,float16,fp8,0,0.5461386839548746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,4,64,0,1,fp8,fp8,0,0.4825013478597005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,float16,0,0.5522666772206625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,8,64,128,1,float16,fp8,0,0.20423465967178345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,8,64,128,1,fp8,fp8,0,0.19154665867487589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,float16,0,0.15635733803113303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,8,64,0,1,float16,fp8,0,0.5551999807357788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,8,64,0,1,fp8,fp8,0,0.4894719918568929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,float16,0,0.3536213239034017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,24,64,128,1,float16,fp8,0,0.15599466363588968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,24,64,128,1,fp8,fp8,0,0.14665599664052328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,float16,0,0.15373333295186362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,24,64,0,1,float16,fp8,0,0.35153599580128986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,24,64,0,1,fp8,fp8,0,0.3144586682319641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,1,64,128,1,float16,fp8,0,0.1540000041325887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,float16,0,0.34572800000508624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,1,64,128,1,fp8,fp8,0,0.14428266882896423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,1,64,0,1,float16,fp8,0,0.34646932284037274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,float16,0,0.15386133392651877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,1,64,0,1,fp8,fp8,0,0.30820266405741376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,float16,0,0.3455359935760498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,2,64,128,1,float16,fp8,0,0.15335466464360556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,2,64,128,1,fp8,fp8,0,0.14283200105031332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,float16,0,0.1527253290017446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,2,64,0,1,float16,fp8,0,0.3473866780598958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,2,64,0,1,fp8,fp8,0,0.3071253299713135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,float16,0,0.3461173375447591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,4,64,128,1,float16,fp8,0,0.15270400047302246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,4,64,128,1,fp8,fp8,0,0.14316266775131226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,4,64,0,1,float16,fp8,0,0.3471413453420003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,float16,0,0.15296000242233276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,4,64,0,1,fp8,fp8,0,0.3084160089492798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,float16,0,0.34745601812998456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,8,64,128,1,float16,fp8,0,0.15427199999491373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,8,64,128,1,fp8,fp8,0,0.1444000005722046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,8,64,0,1,float16,fp8,0,0.3472586472829183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,8,64,0,1,fp8,fp8,0,0.3091520071029663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,float16,0,0.9066080252329508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,1,64,128,1,float16,fp8,0,0.9124639828999838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,1,64,128,1,fp8,fp8,0,0.8239040374755859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,float16,0,2.298426628112793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,float16,0,0.9143199920654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,1,64,0,1,float16,fp8,0,2.303040027618408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,1,64,0,1,fp8,fp8,0,1.9981279373168945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,2,64,128,1,float16,fp8,0,0.9206986427307129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,2,64,128,1,fp8,fp8,0,0.8355840047200521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,float16,0,2.3102025985717773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,float16,0,0.9214293162027994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,2,64,0,1,fp8,fp8,0,2.0082079569498696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,2,64,0,1,float16,fp8,0,2.3129599889119468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,4,64,128,1,float16,fp8,0,0.9326240221659342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,4,64,128,1,fp8,fp8,0,0.8487093448638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,float16,0,2.3212159474690757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,float16,0,0.9462560017903646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,4,64,0,1,float16,fp8,0,2.3269707361857095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,4,64,0,1,fp8,fp8,0,2.0259626706441245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,8,64,128,1,float16,fp8,0,0.9554879665374756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,8,64,128,1,fp8,fp8,0,0.8778399626413981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,float16,0,2.3510986963907876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,float16,0,0.5343573490778605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,8,64,0,1,float16,fp8,0,2.358949343363444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,8,64,0,1,fp8,fp8,0,2.059669335683187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,float16,0,1.2641599973042805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,24,64,128,1,float16,fp8,0,0.5470399856567383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,24,64,128,1,fp8,fp8,0,0.507312019666036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,float16,0,0.4701919953028361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,24,64,0,1,float16,fp8,0,1.2749706904093425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,24,64,0,1,fp8,fp8,0,1.11844269434611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,1,64,128,1,float16,fp8,0,0.47463464736938477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,1,64,128,1,fp8,fp8,0,0.4333440065383911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,float16,0,1.1793866952260335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,float16,0,0.4761386712392171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,1,64,0,1,float16,fp8,0,1.1856906414031982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,1,64,0,1,fp8,fp8,0,1.0329279899597168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,2,64,128,1,float16,fp8,0,0.480618675549825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,float16,0,1.1869653065999348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,2,64,128,1,fp8,fp8,0,0.43932799498240155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,float16,0,0.4811946551005046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,2,64,0,1,float16,fp8,0,1.1906720002492268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,2,64,0,1,fp8,fp8,0,1.0373013019561768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,4,64,128,1,float16,fp8,0,0.48665066560109455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,4,64,128,1,fp8,fp8,0,0.44520533084869385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,float16,0,1.1957546869913738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,float16,0,0.49165332317352295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,4,64,0,1,float16,fp8,0,1.1974133650461833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,4,64,0,1,fp8,fp8,0,1.0423146883646648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,8,64,128,1,float16,fp8,0,0.49804266293843585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,float16,0,1.2094933191935222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,8,64,128,1,fp8,fp8,0,0.45708266894022626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,float16,0,0.28971733649571735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,float16,0,0.6679360071818033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,24,64,128,1,float16,fp8,0,0.29530133803685504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,8,64,0,1,float16,fp8,0,1.2152640024820964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,8,64,0,1,fp8,fp8,0,1.0586079756418865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,24,64,128,1,fp8,fp8,0,0.2769013245900472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,float16,0,0.2546986738840739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,24,64,0,1,fp8,fp8,0,0.596010684967041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,24,64,0,1,float16,fp8,0,0.6744799613952637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,1,64,128,1,float16,fp8,0,0.2576533357302348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,float16,0,0.6216586828231812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,1,64,128,1,fp8,fp8,0,0.23871999979019165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,float16,0,0.2552746733029683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,1,64,0,1,float16,fp8,0,0.6239039897918701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,1,64,0,1,fp8,fp8,0,0.5516639947891235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,float16,0,0.6259680191675822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,2,64,128,1,float16,fp8,0,0.25844266017278034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,2,64,128,1,fp8,fp8,0,0.2407039999961853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,float16,0,0.2627306580543518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,2,64,0,1,float16,fp8,0,0.6285013357798258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,2,64,0,1,fp8,fp8,0,0.5535946687062582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,float16,0,0.6317813396453857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,4,64,128,1,float16,fp8,0,0.26499734322230023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,4,64,128,1,fp8,fp8,0,0.24458134174346924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,float16,0,0.26713067293167114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,4,64,0,1,float16,fp8,0,0.633786678314209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,4,64,0,1,fp8,fp8,0,0.558512012163798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,float16,0,0.6394986708958944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,8,64,128,1,float16,fp8,0,0.2715199987093608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,8,64,128,1,fp8,fp8,0,0.2510826587677002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,float16,0,0.16496533155441284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,8,64,0,1,float16,fp8,0,0.6450026830037435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,float16,0,0.3717706600824992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,8,64,0,1,fp8,fp8,0,0.5651040077209473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,24,64,128,1,float16,fp8,0,0.16910399993260702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,24,64,128,1,fp8,fp8,0,0.16099733114242554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,float16,0,0.14392000436782837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,24,64,0,1,fp8,fp8,0,0.3338079849878947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,24,64,0,1,float16,fp8,0,0.3754719893137614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,float16,0,0.34508268038431805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,1,64,128,1,float16,fp8,0,0.14460266629854837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,1,64,128,1,fp8,fp8,0,0.13580800096193948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,1,64,0,1,float16,fp8,0,0.3463253180185954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,float16,0,0.14670933286348978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,1,64,0,1,fp8,fp8,0,0.30408533414204914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,float16,0,0.34595731894175213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,2,64,128,1,float16,fp8,0,0.14622400204340616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,2,64,128,1,fp8,fp8,0,0.13821333646774292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,float16,0,0.14762133359909058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,2,64,0,1,float16,fp8,0,0.34727466106414795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,2,64,0,1,fp8,fp8,0,0.30641599496205646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,float16,0,0.34723734855651855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,4,64,128,1,float16,fp8,0,0.14892266194025675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,4,64,128,1,fp8,fp8,0,0.14244799812634787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,4,64,0,1,float16,fp8,0,0.34904531637827557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,float16,0,0.15057599544525146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,4,64,0,1,fp8,fp8,0,0.31177600224812824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,float16,0,0.3529226779937744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,8,64,128,1,float16,fp8,0,0.15345066785812378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,8,64,128,1,fp8,fp8,0,0.14822399616241455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,8,64,0,1,float16,fp8,0,0.3556906779607137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,float16,0,0.11678933103879292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,8,64,0,1,fp8,fp8,0,0.3167840043703715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,float16,0,0.23592533667882284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,24,64,128,1,float16,fp8,0,0.11770666639010112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,24,64,128,1,fp8,fp8,0,0.11165866255760193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,24,64,0,1,float16,fp8,0,0.23638399442036948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,float16,0,0.11633599797884624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,24,64,0,1,fp8,fp8,0,0.2123946746190389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,float16,0,0.23188267151514688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,1,64,128,1,float16,fp8,0,0.11542399724324544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,1,64,128,1,fp8,fp8,0,0.11087999741236369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,float16,0,0.11546132961908977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,1,64,0,1,float16,fp8,0,0.23315733671188354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,1,64,0,1,fp8,fp8,0,0.20755199591318765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,2,64,128,1,float16,fp8,0,0.11571733156840007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,2,64,128,1,fp8,fp8,0,0.10940266648928325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,float16,0,0.23253333568572998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,float16,0,0.11566399534543355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,2,64,0,1,fp8,fp8,0,0.2079520026842753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,2,64,0,1,float16,fp8,0,0.23280000686645508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,4,64,128,1,float16,fp8,0,0.11633066336313884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,4,64,128,1,fp8,fp8,0,0.10967999696731567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,float16,0,0.23285865783691406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,4,64,0,1,fp8,fp8,0,0.20798399051030478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,float16,0,0.11591466267903645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,4,64,0,1,float16,fp8,0,0.23457600673039755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,8,64,128,1,float16,fp8,0,0.11637866497039795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,8,64,128,1,fp8,fp8,0,0.11014399925867717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,float16,0,0.23246399561564127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,8,64,0,1,float16,fp8,0,0.23437867561976114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,8,64,0,1,fp8,fp8,0,0.2083253264427185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,float16,0,1.1946933269500732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,1,64,128,1,float16,fp8,0,1.2049226760864258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,1,64,128,1,fp8,fp8,0,1.0869812965393066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,float16,0,2.3571413358052573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,1,64,0,1,float16,fp8,0,2.367157300313314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,float16,0,1.2136159737904866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,1,64,0,1,fp8,fp8,0,2.0672426223754883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,2,64,128,1,fp8,fp8,0,1.1106133460998535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,2,64,128,1,float16,fp8,0,1.2241493066151936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,float16,0,2.3820853233337402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,float16,0,1.2259360154469807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,2,64,0,1,float16,fp8,0,2.390175978342692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,2,64,0,1,fp8,fp8,0,2.0887093544006348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,4,64,128,1,float16,fp8,0,1.2358026504516602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,4,64,128,1,fp8,fp8,0,1.123471975326538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,float16,0,2.39629332224528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,float16,0,1.259226640065511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,4,64,0,1,float16,fp8,0,2.4037653605143228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,4,64,0,1,fp8,fp8,0,2.1010665893554688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,8,64,128,1,fp8,fp8,0,1.1640640099843342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,float16,0,2.440165360768636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,8,64,128,1,float16,fp8,0,1.2706773281097412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,float16,0,0.691765308380127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,float16,0,1.3143786589304607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,8,64,0,1,float16,fp8,0,2.445674737294515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,24,64,128,1,float16,fp8,0,0.7054826418558756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,8,64,0,1,fp8,fp8,0,2.1510027249654136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,24,64,128,1,fp8,fp8,0,0.6564160188039144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,24,64,0,1,float16,fp8,0,1.3269226551055908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,float16,0,0.6059360106786092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,24,64,0,1,fp8,fp8,0,1.1706773440043132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,float16,0,1.1979040304819744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,1,64,128,1,fp8,fp8,0,0.554693341255188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,1,64,128,1,float16,fp8,0,0.6117813189824423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,1,64,0,1,float16,fp8,0,1.2042187054951985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,float16,0,0.6133226553599039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,1,64,0,1,fp8,fp8,0,1.0518453121185303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,float16,0,1.2030773162841797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,2,64,128,1,float16,fp8,0,0.6212426821390787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,2,64,128,1,fp8,fp8,0,0.5620160102844238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,float16,0,0.6194186607996622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,2,64,0,1,float16,fp8,0,1.2103520234425862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,2,64,0,1,fp8,fp8,0,1.0585813522338867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,float16,0,1.2107253074645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,4,64,128,1,float16,fp8,0,0.624016006787618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,4,64,128,1,fp8,fp8,0,0.5687786738077799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,float16,0,0.6318186521530151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,4,64,0,1,float16,fp8,0,1.2178666591644287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,4,64,0,1,fp8,fp8,0,1.0674453576405842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,float16,0,1.2319626808166504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,8,64,128,1,fp8,fp8,0,0.5866453250249227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,8,64,128,1,float16,fp8,0,0.6410773197809855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,float16,0,0.3614399830500285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,8,64,0,1,float16,fp8,0,1.2399146556854248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,float16,0,0.6820267041524252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,8,64,0,1,fp8,fp8,0,1.0889653364817302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,24,64,128,1,float16,fp8,0,0.370192011197408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,24,64,128,1,fp8,fp8,0,0.3449920018513997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,float16,0,0.3183679978052775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,24,64,0,1,float16,fp8,0,0.692517360051473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,24,64,0,1,fp8,fp8,0,0.6105013291041056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,float16,0,0.6214026610056559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,1,64,128,1,float16,fp8,0,0.32064000765482586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,1,64,128,1,fp8,fp8,0,0.29587199290593463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,1,64,0,1,float16,fp8,0,0.6252853473027548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,float16,0,0.3223946690559387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,1,64,0,1,fp8,fp8,0,0.5525386730829874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,float16,0,0.6254666646321615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,2,64,128,1,float16,fp8,0,0.3247999946276347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,2,64,128,1,fp8,fp8,0,0.301146666208903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,2,64,0,1,float16,fp8,0,0.6289173364639282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,float16,0,0.3267093300819397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,2,64,0,1,fp8,fp8,0,0.5554453531901041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,float16,0,0.6312319835027059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,4,64,128,1,float16,fp8,0,0.3307039936383565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,4,64,128,1,fp8,fp8,0,0.3038453261057536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,4,64,0,1,float16,fp8,0,0.6352426608403524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,float16,0,0.3330346743265788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,4,64,0,1,fp8,fp8,0,0.5605653524398804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,8,64,128,1,float16,fp8,0,0.3389279842376709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,8,64,128,1,fp8,fp8,0,0.31034666299819946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,float16,0,0.6413919925689697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,float16,0,0.19882667064666748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,8,64,0,1,fp8,fp8,0,0.5705386797587076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,8,64,0,1,float16,fp8,0,0.6457706689834595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,24,64,128,1,float16,fp8,0,0.20376533269882202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,24,64,128,1,fp8,fp8,0,0.19157866636912027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,float16,0,0.3676053285598755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,float16,0,0.1728586753209432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,24,64,0,1,fp8,fp8,0,0.3310133417447408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,24,64,0,1,float16,fp8,0,0.3727253278096517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,1,64,128,1,float16,fp8,0,0.17293334007263184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,float16,0,0.33262399832407635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,1,64,128,1,fp8,fp8,0,0.16291200121243796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,float16,0,0.17274133364359537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,1,64,0,1,float16,fp8,0,0.3333280086517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,1,64,0,1,fp8,fp8,0,0.2999253273010254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,2,64,128,1,float16,fp8,0,0.17672000328699747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,2,64,128,1,fp8,fp8,0,0.1657919983069102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,float16,0,0.3332266608874003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,float16,0,0.1758133371671041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,2,64,0,1,fp8,fp8,0,0.3012746572494507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,2,64,0,1,float16,fp8,0,0.3364693323771159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,4,64,128,1,float16,fp8,0,0.1797599991162618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,4,64,128,1,fp8,fp8,0,0.16912533839543661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,float16,0,0.3389333486557007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,float16,0,0.1832586725552877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,4,64,0,1,fp8,fp8,0,0.3059626619021098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,4,64,0,1,float16,fp8,0,0.3413013219833374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,8,64,128,1,float16,fp8,0,0.18525334199269614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,8,64,128,1,fp8,fp8,0,0.17494400342305502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,float16,0,0.3463573455810547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,float16,0,0.11760532855987549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,8,64,0,1,fp8,fp8,0,0.31244800488154095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,8,64,0,1,float16,fp8,0,0.3466399908065796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,float16,0,0.2079360087712606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,24,64,128,1,float16,fp8,0,0.11819199721018474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,24,64,128,1,fp8,fp8,0,0.11552533507347107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,float16,0,0.10332266489664714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,24,64,0,1,fp8,fp8,0,0.19154133399327597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,float16,0,0.19344000021616617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,24,64,0,1,float16,fp8,0,0.21173866589864096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,1,64,128,1,float16,fp8,0,0.10441066821416219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,1,64,128,1,fp8,fp8,0,0.09479467074076335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,1,64,0,1,float16,fp8,0,0.1922453244527181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,1,64,0,1,fp8,fp8,0,0.17045332988103232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,float16,0,0.10410666465759277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,float16,0,0.1930453379948934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,2,64,128,1,float16,fp8,0,0.10322133700052898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,2,64,128,1,fp8,fp8,0,0.09578667084376018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,2,64,0,1,float16,fp8,0,0.193615992863973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,float16,0,0.10317333539326985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,2,64,0,1,fp8,fp8,0,0.1713386575380961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,4,64,128,1,float16,fp8,0,0.1051680048306783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,float16,0,0.19408533970514932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,4,64,128,1,fp8,fp8,0,0.09717333316802979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,4,64,0,1,float16,fp8,0,0.19538666804631552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,4,64,0,1,fp8,fp8,0,0.17199466625849405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,float16,0,0.10559999942779541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,float16,0,0.19748266537984213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,8,64,128,1,float16,fp8,0,0.1074079970518748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,8,64,128,1,fp8,fp8,0,0.10109866658846538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,float16,0,0.08332266906897227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,8,64,0,1,float16,fp8,0,0.1974560022354126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,8,64,0,1,fp8,fp8,0,0.17916800578435263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,float16,0,0.13994666934013367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,24,64,128,1,float16,fp8,0,0.08470933636029561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,24,64,128,1,fp8,fp8,0,0.08098666866620381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,24,64,0,1,float16,fp8,0,0.1395146648089091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,24,64,0,1,fp8,fp8,0,0.12585600217183432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,float16,0,0.08453333377838135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,1,64,128,1,float16,fp8,0,0.08468799789746602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,float16,0,0.1381280024846395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,1,64,128,1,fp8,fp8,0,0.07897066573301952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,1,64,0,1,float16,fp8,0,0.1388800044854482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,1,64,0,1,fp8,fp8,0,0.1251146694024404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,float16,0,0.08322666585445404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,float16,0,0.13858667016029358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,2,64,128,1,float16,fp8,0,0.08458667000134786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,2,64,128,1,fp8,fp8,0,0.07869333525498708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,2,64,0,1,float16,fp8,0,0.1381439963976542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,2,64,0,1,fp8,fp8,0,0.12596266468365988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,float16,0,0.082805335521698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,float16,0,0.13824533422787985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,4,64,128,1,float16,fp8,0,0.08281066517035167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,4,64,128,1,fp8,fp8,0,0.0807360013326009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,4,64,0,1,float16,fp8,0,0.13854933778444925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,4,64,0,1,fp8,fp8,0,0.12437333663304646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,float16,0,0.0846666693687439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,float16,0,0.14014400045077005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,8,64,128,1,float16,fp8,0,0.08458667000134786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,8,64,128,1,fp8,fp8,0,0.08073066671689351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,8,64,0,1,float16,fp8,0,0.1400373379389445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,8,64,0,1,fp8,fp8,0,0.12582932909329733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,float16,0,0.8961226940155029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,1,64,128,1,float16,fp8,0,0.9033599694569906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,float16,0,1.513045310974121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,1,64,128,1,fp8,fp8,0,0.8129279613494873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,1,64,0,1,float16,fp8,0,1.5180266698201497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,float16,0,0.9059786796569824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,1,64,0,1,fp8,fp8,0,1.3316853046417236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,2,64,128,1,float16,fp8,0,0.9132586320241293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,2,64,128,1,fp8,fp8,0,0.826416015625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,float16,0,1.5240426063537598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,float16,0,0.913653294245402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,2,64,0,1,fp8,fp8,0,1.3456746737162273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,2,64,0,1,float16,fp8,0,1.533189296722412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,4,64,128,1,float16,fp8,0,0.9226240317026774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,4,64,128,1,fp8,fp8,0,0.8373760382334391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,float16,0,1.5373493830362956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,float16,0,0.9360106786092123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,4,64,0,1,fp8,fp8,0,1.3558400472005208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,4,64,0,1,float16,fp8,0,1.543882687886556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,float16,0,1.5597920417785645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,8,64,128,1,float16,fp8,0,0.9452160199483236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,8,64,128,1,fp8,fp8,0,0.8630879720052084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,float16,0,0.5220640103022257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,8,64,0,1,float16,fp8,0,1.5709865887959797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,float16,0,0.8535573482513428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,24,64,128,1,float16,fp8,0,0.5337866544723511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,8,64,0,1,fp8,fp8,0,1.392240047454834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,24,64,128,1,fp8,fp8,0,0.4946133295694987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,float16,0,0.45795198281606037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,24,64,0,1,fp8,fp8,0,0.7715093294779459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,24,64,0,1,float16,fp8,0,0.8647147019704183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,float16,0,0.7726986408233643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,1,64,128,1,float16,fp8,0,0.46276267369588214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,1,64,128,1,fp8,fp8,0,0.421125332514445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,1,64,0,1,float16,fp8,0,0.7775839964548746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,float16,0,0.46386667092641193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,1,64,0,1,fp8,fp8,0,0.6860906283060709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,float16,0,0.7804373105367025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,2,64,128,1,fp8,fp8,0,0.42685333887736004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,2,64,128,1,float16,fp8,0,0.46820799509684247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,2,64,0,1,float16,fp8,0,0.7843626340230306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,float16,0,0.4700746536254883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,2,64,0,1,fp8,fp8,0,0.6914933522542318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,float16,0,0.7848853270212809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,4,64,128,1,float16,fp8,0,0.4742826620737712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,4,64,128,1,fp8,fp8,0,0.43271998564402264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,4,64,0,1,float16,fp8,0,0.7910186449686686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,float16,0,0.4784586826960246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,4,64,0,1,fp8,fp8,0,0.6974720160166422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,float16,0,0.7986613114674886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,8,64,128,1,float16,fp8,0,0.4845920006434123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,8,64,128,1,fp8,fp8,0,0.44345064957936603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,8,64,0,1,float16,fp8,0,0.8039733568827311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,float16,0,0.2757866581281026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,float16,0,0.44857601324717206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,8,64,0,1,fp8,fp8,0,0.7115200360616049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,24,64,128,1,fp8,fp8,0,0.2640586694081624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,24,64,128,1,float16,fp8,0,0.2834080060323079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,24,64,0,1,float16,fp8,0,0.4554400046666463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,float16,0,0.24060799678166708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,24,64,0,1,fp8,fp8,0,0.40700801213582355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,float16,0,0.4040213425954183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,1,64,128,1,float16,fp8,0,0.24263999859491983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,1,64,128,1,fp8,fp8,0,0.22628267606099448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,1,64,0,1,float16,fp8,0,0.4079093138376872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,1,64,0,1,fp8,fp8,0,0.3646879990895589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,float16,0,0.24406399329503378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,float16,0,0.40788265069325763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,2,64,128,1,float16,fp8,0,0.24655999739964804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,2,64,128,1,fp8,fp8,0,0.22889065742492676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,2,64,0,1,float16,fp8,0,0.4103519916534424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,float16,0,0.24779200553894043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,2,64,0,1,fp8,fp8,0,0.3677599827448527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,float16,0,0.41282133261362713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,4,64,128,1,float16,fp8,0,0.2508106629053752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,4,64,128,1,fp8,fp8,0,0.23204267024993896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,4,64,0,1,float16,fp8,0,0.41654400030771893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,4,64,0,1,fp8,fp8,0,0.37136534849802655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,float16,0,0.2532106637954712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,float16,0,0.4208853244781494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,8,64,128,1,float16,fp8,0,0.25912533203760785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,8,64,128,1,fp8,fp8,0,0.23841599623362222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,float16,0,0.15237333377202353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,8,64,0,1,float16,fp8,0,0.42341868082682294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,8,64,0,1,fp8,fp8,0,0.37985066572825116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,float16,0,0.24463466803232828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,24,64,128,1,float16,fp8,0,0.15657599767049155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,24,64,128,1,fp8,fp8,0,0.1482133368651072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,24,64,0,1,float16,fp8,0,0.2504319945971171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,24,64,0,1,fp8,fp8,0,0.22616533438364664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,float16,0,0.12938666343688965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,float16,0,0.21894399325052896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,1,64,128,1,float16,fp8,0,0.1318880021572113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,1,64,128,1,fp8,fp8,0,0.12174399693806966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,1,64,0,1,float16,fp8,0,0.22082134087880453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,1,64,0,1,fp8,fp8,0,0.1969546675682068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,float16,0,0.12982400258382162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,float16,0,0.21896533171335855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,2,64,128,1,float16,fp8,0,0.13195733229319254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,2,64,128,1,fp8,fp8,0,0.12427733341852824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,2,64,0,1,float16,fp8,0,0.22129599253336588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,2,64,0,1,fp8,fp8,0,0.1998293399810791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,float16,0,0.13246933619181314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,float16,0,0.22207999229431152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,4,64,128,1,float16,fp8,0,0.13430933157602945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,4,64,128,1,fp8,fp8,0,0.12809066971143088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,4,64,0,1,float16,fp8,0,0.22482667366663614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,4,64,0,1,fp8,fp8,0,0.20381333430608115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,float16,0,0.13660266995429993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,8,64,128,1,float16,fp8,0,0.1405173341433207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,8,64,128,1,fp8,fp8,0,0.13386666774749756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,float16,0,0.2281013329823812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,float16,0,0.08707200487454732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,8,64,0,1,float16,fp8,0,0.23035200436909994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,8,64,0,1,fp8,fp8,0,0.2102186679840088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,float16,0,0.14074132839838663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,24,64,128,1,float16,fp8,0,0.08780800302823384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,24,64,128,1,fp8,fp8,0,0.08852266271909077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,24,64,0,1,float16,fp8,0,0.144186665614446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,24,64,0,1,fp8,fp8,0,0.13192533453305563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,float16,0,0.07899199922879536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,1,64,128,1,float16,fp8,0,0.0799786647160848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,float16,0,0.13241599996884665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,1,64,128,1,fp8,fp8,0,0.07445333401362102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,float16,0,0.07899199922879536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,1,64,0,1,float16,fp8,0,0.13381866614023843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,1,64,0,1,fp8,fp8,0,0.11799466609954834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,2,64,128,1,float16,fp8,0,0.0804746647675832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,2,64,128,1,fp8,fp8,0,0.07486400008201599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,float16,0,0.132560004790624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,float16,0,0.0802293320496877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,2,64,0,1,float16,fp8,0,0.13219733039538065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,2,64,0,1,fp8,fp8,0,0.11964266498883565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,4,64,128,1,float16,fp8,0,0.08044800162315369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,4,64,128,1,fp8,fp8,0,0.07486400008201599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,float16,0,0.13330666224161783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,4,64,0,1,float16,fp8,0,0.13450666268666586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,4,64,0,1,fp8,fp8,0,0.11970133582750957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,float16,0,0.08078933258851369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,8,64,128,1,float16,fp8,0,0.08166400094827016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,float16,0,0.13403733571370444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,8,64,128,1,fp8,fp8,0,0.07881066699822743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,float16,0,0.06452266871929169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,8,64,0,1,float16,fp8,0,0.13483200470606485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,8,64,0,1,fp8,fp8,0,0.12168000141779582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,24,64,128,1,float16,fp8,0,0.06634666522343953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,24,64,128,1,fp8,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,float16,0,0.09945600231488545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,24,64,0,1,float16,fp8,0,0.10132799545923869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,float16,0,0.06467733283837636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,24,64,0,1,fp8,fp8,0,0.09128000338872273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,1,64,128,1,float16,fp8,0,0.06436266501744588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,1,64,128,1,fp8,fp8,0,0.06250133117039998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,float16,0,0.1011946698029836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,float16,0,0.06550399959087372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,1,64,0,1,fp8,fp8,0,0.09085866808891296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,1,64,0,1,float16,fp8,0,0.10097066561381023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,float16,0,0.10134399930636089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,2,64,128,1,fp8,fp8,0,0.062074666221936546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,2,64,128,1,float16,fp8,0,0.06436266501744588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,2,64,0,1,float16,fp8,0,0.1011786659558614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,float16,0,0.06465066472689311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,2,64,0,1,fp8,fp8,0,0.09297066926956177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,float16,0,0.1011306643486023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,4,64,128,1,float16,fp8,0,0.06486933430035909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,4,64,128,1,fp8,fp8,0,0.06205333272616068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,float16,0,0.0658240020275116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,4,64,0,1,float16,fp8,0,0.10115200281143188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,float16,0,0.10132267077763875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,4,64,0,1,fp8,fp8,0,0.09117333094278972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,8,64,128,1,float16,fp8,0,0.06488533318042755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,8,64,128,1,fp8,fp8,0,0.06201066573460897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,8,64,0,1,float16,fp8,0,0.10141332944234212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,8,64,0,1,fp8,fp8,0,0.09125866492589314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,1,64,128,1,fp8,fp8,0,1.089781363805135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,float16,0,1.2182933489481609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,1,64,128,1,float16,fp8,0,1.214901367823283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,float16,0,1.693610668182373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,float16,0,1.2384426593780518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,1,64,0,1,fp8,fp8,0,1.489151954650879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,1,64,0,1,float16,fp8,0,1.6921119689941406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,float16,0,1.7157173156738281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,2,64,128,1,float16,fp8,0,1.2381493250528972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,2,64,128,1,fp8,fp8,0,1.1063253084818523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,2,64,0,1,float16,fp8,0,1.7133386929829915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,2,64,0,1,fp8,fp8,0,1.502336025238037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,float16,0,1.2600213686625164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,float16,0,1.7442879676818848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,4,64,128,1,float16,fp8,0,1.251519997914632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,4,64,128,1,fp8,fp8,0,1.116634686787923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,4,64,0,1,float16,fp8,0,1.7317280769348145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,4,64,0,1,fp8,fp8,0,1.5140746434529622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,float16,0,1.2848693529764812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,float16,0,1.7738720575968425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,8,64,128,1,float16,fp8,0,1.280453364054362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,8,64,128,1,fp8,fp8,0,1.154368003209432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,8,64,0,1,float16,fp8,0,1.7679039637247722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,float16,0,0.6926186879475912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,float16,0,0.9530560175577799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,8,64,0,1,fp8,fp8,0,1.5622132619222004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,24,64,128,1,float16,fp8,0,0.7008106708526611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,24,64,128,1,fp8,fp8,0,0.6516586542129517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,24,64,0,1,float16,fp8,0,0.9605440298716227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,24,64,0,1,fp8,fp8,0,0.8680373032887777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,float16,0,0.6032426754633585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,float16,0,0.8447573184967041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,1,64,128,1,float16,fp8,0,0.6068053245544434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,1,64,128,1,fp8,fp8,0,0.5485440095265707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,1,64,0,1,fp8,fp8,0,0.7488586902618408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,float16,0,0.6114879846572876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,1,64,0,1,float16,fp8,0,0.8492533365885416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,2,64,128,1,float16,fp8,0,0.6164906819661459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,2,64,128,1,fp8,fp8,0,0.5561866760253906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,float16,0,0.8525439898173014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,2,64,0,1,fp8,fp8,0,0.7589866320292155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,float16,0,0.6162506739298502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,2,64,0,1,float16,fp8,0,0.8591519991556803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,4,64,128,1,fp8,fp8,0,0.5633813142776489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,4,64,128,1,float16,fp8,0,0.6216373443603516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,float16,0,0.8616159756978353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,4,64,0,1,fp8,fp8,0,0.7663626670837402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,float16,0,0.6321920156478882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,4,64,0,1,float16,fp8,0,0.8663893540700277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,8,64,128,1,fp8,fp8,0,0.5818880001703898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,8,64,128,1,float16,fp8,0,0.6381173531214396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,float16,0,0.8778186639149984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,8,64,0,1,float16,fp8,0,0.8850506941477457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,float16,0,0.35708268483479816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,float16,0,0.49032533168792725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,8,64,0,1,fp8,fp8,0,0.7875999609629313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,24,64,128,1,float16,fp8,0,0.36393598715464276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,24,64,128,1,fp8,fp8,0,0.33854401111602783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,24,64,0,1,float16,fp8,0,0.4980959892272949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,24,64,0,1,fp8,fp8,0,0.4497386614481608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,float16,0,0.31139200925827026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,float16,0,0.43518932660420734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,1,64,128,1,float16,fp8,0,0.3144479990005493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,1,64,128,1,fp8,fp8,0,0.2874293327331543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,1,64,0,1,float16,fp8,0,0.43906132380167645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,1,64,0,1,fp8,fp8,0,0.39285866419474286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,float16,0,0.31306666135787964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,float16,0,0.4379893143971761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,2,64,128,1,fp8,fp8,0,0.29233600695927936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,2,64,128,1,float16,fp8,0,0.3174186746279399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,2,64,0,1,float16,fp8,0,0.4416319926579793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,2,64,0,1,fp8,fp8,0,0.39795732498168945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,float16,0,0.3186826705932617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,float16,0,0.44439999262491864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,4,64,128,1,float16,fp8,0,0.32279467582702637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,4,64,128,1,fp8,fp8,0,0.2956800063451131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,4,64,0,1,float16,fp8,0,0.44846399625142414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,4,64,0,1,fp8,fp8,0,0.4004480044047038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,float16,0,0.3258613348007202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,float16,0,0.45533867677052814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,8,64,128,1,float16,fp8,0,0.33130667606989544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,8,64,128,1,fp8,fp8,0,0.3044053316116333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,float16,0,0.1914773384730021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,8,64,0,1,float16,fp8,0,0.45787731806437176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,8,64,0,1,fp8,fp8,0,0.41222933928171795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,float16,0,0.2616106669108073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,24,64,128,1,fp8,fp8,0,0.18318400780359903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,24,64,128,1,float16,fp8,0,0.19555733601252237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,24,64,0,1,float16,fp8,0,0.26555200417836505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,24,64,0,1,fp8,fp8,0,0.24278400341669717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,float16,0,0.16220266620318094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,float16,0,0.22938666741053262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,1,64,128,1,float16,fp8,0,0.16476800044377646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,1,64,128,1,fp8,fp8,0,0.15646933515866598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,1,64,0,1,float16,fp8,0,0.23094399770100912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,1,64,0,1,fp8,fp8,0,0.21215466658274332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,float16,0,0.16339199741681418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,float16,0,0.22921067476272583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,2,64,128,1,float16,fp8,0,0.16787733634312949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,2,64,128,1,fp8,fp8,0,0.15811733404795328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,2,64,0,1,float16,fp8,0,0.2323946754137675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,float16,0,0.16805867354075113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,2,64,0,1,fp8,fp8,0,0.21215999126434326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,float16,0,0.23357866207758585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,4,64,128,1,float16,fp8,0,0.16963734229405722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,4,64,128,1,fp8,fp8,0,0.15972800056139627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,4,64,0,1,float16,fp8,0,0.2363306681315104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,4,64,0,1,fp8,fp8,0,0.2161173423131307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,float16,0,0.1750026742617289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,float16,0,0.2421440084775289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,8,64,128,1,float16,fp8,0,0.17725332578023276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,8,64,128,1,fp8,fp8,0,0.16476800044377646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,8,64,0,1,float16,fp8,0,0.24399999777475992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,float16,0,0.10769599676132202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,float16,0,0.144896000623703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,8,64,0,1,fp8,fp8,0,0.2221440076828003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,24,64,128,1,float16,fp8,0,0.11009066303571065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,24,64,128,1,fp8,fp8,0,0.10543466607729594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,24,64,0,1,float16,fp8,0,0.14824000000953674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,24,64,0,1,fp8,fp8,0,0.13637866576512656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,float16,0,0.09310400485992432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,float16,0,0.1283466617266337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,1,64,128,1,float16,fp8,0,0.09520000219345093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,1,64,128,1,fp8,fp8,0,0.08557867010434468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,1,64,0,1,float16,fp8,0,0.12990933656692505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,float16,0,0.09342933694521587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,1,64,0,1,fp8,fp8,0,0.11571199695269267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,float16,0,0.13082133730252585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,2,64,128,1,float16,fp8,0,0.09505599737167358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,2,64,128,1,fp8,fp8,0,0.08532800277074178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,2,64,0,1,float16,fp8,0,0.13194132844607034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,float16,0,0.09404800335566203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,2,64,0,1,fp8,fp8,0,0.11789333820343018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,float16,0,0.1299626628557841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,4,64,128,1,float16,fp8,0,0.09722666939099629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,4,64,128,1,fp8,fp8,0,0.08777599533398946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,4,64,0,1,float16,fp8,0,0.13191999991734824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,float16,0,0.09734933574994405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,4,64,0,1,fp8,fp8,0,0.11926399668057759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,float16,0,0.13243200381596884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,8,64,128,1,float16,fp8,0,0.09955199559529622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,8,64,128,1,fp8,fp8,0,0.09124267101287842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,8,64,0,1,float16,fp8,0,0.13571199774742126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,float16,0,0.06348800162474315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,8,64,0,1,fp8,fp8,0,0.12361600001653035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,float16,0,0.08702400326728821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,24,64,128,1,float16,fp8,0,0.06419733166694641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,24,64,128,1,fp8,fp8,0,0.06201600035031637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,24,64,0,1,float16,fp8,0,0.0888426701227824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,float16,0,0.058261334896087646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,24,64,0,1,fp8,fp8,0,0.08083199958006541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,float16,0,0.08282133440176646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,1,64,128,1,float16,fp8,0,0.05978666742642721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,1,64,128,1,fp8,fp8,0,0.05632533133029938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,1,64,0,1,float16,fp8,0,0.08338133494059245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,1,64,0,1,fp8,fp8,0,0.075162669022878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,float16,0,0.059994667768478394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,float16,0,0.08286933104197185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,2,64,128,1,float16,fp8,0,0.06057600180308024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,2,64,128,1,fp8,fp8,0,0.05559466779232025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,2,64,0,1,float16,fp8,0,0.08294933537642162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,2,64,0,1,fp8,fp8,0,0.07654933134714763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,float16,0,0.05892266829808553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,float16,0,0.08273066580295563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,4,64,128,1,float16,fp8,0,0.06046933432420095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,4,64,128,1,fp8,fp8,0,0.05611733098824819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,4,64,0,1,float16,fp8,0,0.08281599978605907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,4,64,0,1,fp8,fp8,0,0.07523199915885925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,float16,0,0.06033066908518473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,float16,0,0.08483733733495076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,8,64,128,1,float16,fp8,0,0.060831998785336815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,8,64,128,1,fp8,fp8,0,0.05817066629727682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,8,64,0,1,float16,fp8,0,0.08549333612124126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,8,64,0,1,fp8,fp8,0,0.07706133524576823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,float16,0,0.05208533505598704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,float16,0,0.06646933158238728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,24,64,128,1,float16,fp8,0,0.051882664362589516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,24,64,128,1,fp8,fp8,0,0.0498879998922348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,24,64,0,1,float16,fp8,0,0.06622933348019917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,24,64,0,1,fp8,fp8,0,0.06065066655476888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,float16,0,0.05217066903909048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,float16,0,0.0669653316338857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,1,64,128,1,float16,fp8,0,0.051685333251953125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,1,64,128,1,fp8,fp8,0,0.04781866570313772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,1,64,0,1,float16,fp8,0,0.06723733246326447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,1,64,0,1,fp8,fp8,0,0.06046399970849355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,float16,0,0.05119466781616211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,float16,0,0.06713599960009257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,2,64,128,1,float16,fp8,0,0.05193066596984863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,2,64,128,1,fp8,fp8,0,0.047930667797724404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,2,64,0,1,float16,fp8,0,0.06713599960009257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,2,64,0,1,fp8,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,float16,0,0.05082666873931885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,float16,0,0.06607466439406078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,4,64,128,1,float16,fp8,0,0.05203733344872793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,4,64,128,1,fp8,fp8,0,0.05004799862702688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,4,64,0,1,float16,fp8,0,0.06642666459083557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,4,64,0,1,fp8,fp8,0,0.06227200229962667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,float16,0,0.05190933247407278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,float16,0,0.06604266663392384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,8,64,128,1,float16,fp8,0,0.05211733281612396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,8,64,128,1,fp8,fp8,0,0.04785066843032837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,8,64,0,1,float16,fp8,0,0.06643733382225037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,8,64,0,1,fp8,fp8,0,0.06055466830730438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,float16,0,0.8895839850107828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,1,64,128,1,float16,fp8,0,0.8972586790720621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,1,64,128,1,fp8,fp8,0,0.8078880310058594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,float16,0,1.1246079603830974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,1,64,0,1,float16,fp8,0,1.1281867027282715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,1,64,0,1,fp8,fp8,0,1.001578648885091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,float16,0,0.9050133228302002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,float16,0,1.1410773595174153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,2,64,128,1,fp8,fp8,0,0.8204373518625895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,2,64,128,1,float16,fp8,0,0.9096852938334147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,2,64,0,1,float16,fp8,0,1.1427679856618245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,2,64,0,1,fp8,fp8,0,1.0149226983388264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,float16,0,0.9194826285044352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,float16,0,1.1544746557871501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,4,64,128,1,fp8,fp8,0,0.8296159903208414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,4,64,128,1,float16,fp8,0,0.9188266595204672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,4,64,0,1,float16,fp8,0,1.155898650487264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,float16,0,0.9338133335113525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,4,64,0,1,fp8,fp8,0,1.0231626828511555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,float16,0,1.1758453051249187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,8,64,128,1,float16,fp8,0,0.9407626787821451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,8,64,128,1,fp8,fp8,0,0.8535892963409424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,8,64,0,1,float16,fp8,0,1.1791626612345378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,float16,0,0.5193813244501749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,float16,0,0.6494239966074625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,8,64,0,1,fp8,fp8,0,1.0518826643625896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,24,64,128,1,float16,fp8,0,0.5293600161870321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,24,64,128,1,fp8,fp8,0,0.49325335025787354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,24,64,0,1,float16,fp8,0,0.6609333356221517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,24,64,0,1,fp8,fp8,0,0.6000426610310873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,float16,0,0.45208533604939777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,float16,0,0.5715039968490601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,1,64,128,1,float16,fp8,0,0.45578134059906006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,1,64,128,1,fp8,fp8,0,0.41371198495229083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,1,64,0,1,float16,fp8,0,0.5758986473083496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,1,64,0,1,fp8,fp8,0,0.513157327969869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,float16,0,0.45745599269866943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,2,64,128,1,float16,fp8,0,0.4628106753031413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,float16,0,0.5787733395894369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,2,64,128,1,fp8,fp8,0,0.4200906753540039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,2,64,0,1,float16,fp8,0,0.5827626784642538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,2,64,0,1,fp8,fp8,0,0.519594669342041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,float16,0,0.46240532398223877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,4,64,128,1,float16,fp8,0,0.4682613213857015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,4,64,128,1,fp8,fp8,0,0.42707733313242596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,float16,0,0.583893338839213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,float16,0,0.47360531489054364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,4,64,0,1,fp8,fp8,0,0.5259146690368652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,4,64,0,1,float16,fp8,0,0.5898933410644531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,float16,0,0.5958773295084635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,8,64,128,1,float16,fp8,0,0.478767991065979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,8,64,128,1,fp8,fp8,0,0.4366613229115804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,8,64,0,1,float16,fp8,0,0.6031359831492106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,float16,0,0.2711679935455322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,float16,0,0.33907198905944824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,24,64,128,1,float16,fp8,0,0.2774933377901713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,8,64,0,1,fp8,fp8,0,0.5399413506189982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,24,64,128,1,fp8,fp8,0,0.258735994497935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,24,64,0,1,float16,fp8,0,0.3464266856511434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,24,64,0,1,fp8,fp8,0,0.3147253394126892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,float16,0,0.2346186637878418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,float16,0,0.29601067304611206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,1,64,128,1,float16,fp8,0,0.23593600591023764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,1,64,128,1,fp8,fp8,0,0.21870932976404825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,1,64,0,1,float16,fp8,0,0.29790399471918744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,1,64,0,1,fp8,fp8,0,0.2712159951527913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,float16,0,0.23564799626668295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,float16,0,0.29819732904434204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,2,64,128,1,float16,fp8,0,0.2375040054321289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,2,64,128,1,fp8,fp8,0,0.22139199574788412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,2,64,0,1,float16,fp8,0,0.30105066299438477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,2,64,0,1,fp8,fp8,0,0.2748533288637797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,float16,0,0.24116265773773193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,float16,0,0.30311999718348187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,4,64,128,1,float16,fp8,0,0.24321067333221436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,4,64,128,1,fp8,fp8,0,0.22618132829666138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,4,64,0,1,float16,fp8,0,0.3071146607398987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,4,64,0,1,fp8,fp8,0,0.27797865867614746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,float16,0,0.24687467018763223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,float16,0,0.31223465998967487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,8,64,128,1,float16,fp8,0,0.25172799825668335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,8,64,128,1,fp8,fp8,0,0.23247466484705606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,8,64,0,1,float16,fp8,0,0.31435734033584595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,8,64,0,1,fp8,fp8,0,0.28472532828648883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,float16,0,0.14614933729171753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,float16,0,0.1829920013745626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,24,64,128,1,float16,fp8,0,0.14909332990646362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,24,64,128,1,fp8,fp8,0,0.14219733079274496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,24,64,0,1,float16,fp8,0,0.18707732359568277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,24,64,0,1,fp8,fp8,0,0.17073599497477213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,float16,0,0.12306132912635803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,float16,0,0.15714133779207864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,1,64,128,1,float16,fp8,0,0.12385066350301106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,1,64,128,1,fp8,fp8,0,0.11401599645614624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,1,64,0,1,float16,fp8,0,0.1586079994837443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,1,64,0,1,fp8,fp8,0,0.1442506710688273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,float16,0,0.12343466281890869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,float16,0,0.15680533647537231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,2,64,128,1,float16,fp8,0,0.12596266468365988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,2,64,128,1,fp8,fp8,0,0.11751466989517212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,2,64,0,1,fp8,fp8,0,0.14631999532381693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,2,64,0,1,float16,fp8,0,0.15944000085194907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,float16,0,0.1267306705315908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,float16,0,0.15966932972272238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,4,64,128,1,float16,fp8,0,0.12804266810417175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,4,64,128,1,fp8,fp8,0,0.12198932965596516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,4,64,0,1,float16,fp8,0,0.16191466649373373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,4,64,0,1,fp8,fp8,0,0.15051733454068503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,float16,0,0.13074666261672974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,float16,0,0.16528532902399698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,8,64,128,1,float16,fp8,0,0.13223466277122498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,8,64,128,1,fp8,fp8,0,0.12780800461769104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,8,64,0,1,float16,fp8,0,0.16837332646052042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,8,64,0,1,fp8,fp8,0,0.15531200170516968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,float16,0,0.08069866895675659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,float16,0,0.10111467043558757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,24,64,128,1,float16,fp8,0,0.08212799827257793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,24,64,128,1,fp8,fp8,0,0.08108800152937572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,24,64,0,1,float16,fp8,0,0.1035040020942688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,24,64,0,1,fp8,fp8,0,0.09944533308347066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,float16,0,0.07082666456699371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,float16,0,0.09240000446637471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,1,64,128,1,float16,fp8,0,0.0724480003118515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,1,64,128,1,fp8,fp8,0,0.06608533362547557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,1,64,0,1,float16,fp8,0,0.09291733304659526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,1,64,0,1,fp8,fp8,0,0.0828906645377477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,float16,0,0.07108800113201141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,float16,0,0.0936959981918335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,2,64,128,1,fp8,fp8,0,0.06866133213043213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,2,64,128,1,float16,fp8,0,0.0726560006539027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,2,64,0,1,float16,fp8,0,0.09424533446629842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,2,64,0,1,fp8,fp8,0,0.08478400111198425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,float16,0,0.07232533395290375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,float16,0,0.09292800227801006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,4,64,128,1,float16,fp8,0,0.0726453314224879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,4,64,128,1,fp8,fp8,0,0.06869333485762279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,4,64,0,1,float16,fp8,0,0.09398933251698811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,4,64,0,1,fp8,fp8,0,0.08455999692281087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,float16,0,0.07457066575686137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,float16,0,0.0953439970811208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,8,64,128,1,float16,fp8,0,0.07513600091139476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,8,64,128,1,fp8,fp8,0,0.07051733136177063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,8,64,0,1,float16,fp8,0,0.09684800108273824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,float16,0,0.050517335534095764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,8,64,0,1,fp8,fp8,0,0.08824533224105835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,float16,0,0.06506666541099548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,24,64,128,1,float16,fp8,0,0.04981866478919983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,24,64,128,1,fp8,fp8,0,0.0480320006608963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,24,64,0,1,float16,fp8,0,0.06669333577156067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,float16,0,0.04804799954096476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,24,64,0,1,fp8,fp8,0,0.060032000144322716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,float16,0,0.06160533428192139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,1,64,128,1,float16,fp8,0,0.04576000074545542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,1,64,128,1,fp8,fp8,0,0.04370133578777313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,1,64,0,1,float16,fp8,0,0.062122667829195656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,1,64,0,1,fp8,fp8,0,0.055946667989095054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,float16,0,0.04574933151404063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,float16,0,0.06181333462397257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,2,64,128,1,float16,fp8,0,0.04659200211366018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,2,64,128,1,fp8,fp8,0,0.0439626673857371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,2,64,0,1,float16,fp8,0,0.06237866481145223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,2,64,0,1,fp8,fp8,0,0.05589333176612854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,float16,0,0.04776533444722494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,float16,0,0.06232533355553945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,4,64,128,1,float16,fp8,0,0.04797333478927612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,4,64,128,1,fp8,fp8,0,0.043706665436426796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,4,64,0,1,float16,fp8,0,0.06318933268388112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,4,64,0,1,fp8,fp8,0,0.05806399881839752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,float16,0,0.0470773329337438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,float16,0,0.06258133550484975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,8,64,128,1,float16,fp8,0,0.0483893354733785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,8,64,128,1,fp8,fp8,0,0.04576533536116282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,8,64,0,1,float16,fp8,0,0.06410666803518932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,8,64,0,1,fp8,fp8,0,0.058186665177345276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,float16,0,0.039546666045983635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,float16,0,0.04769066472848257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,24,64,128,1,float16,fp8,0,0.03959999978542328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,24,64,128,1,fp8,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,24,64,0,1,float16,fp8,0,0.04822400212287903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,24,64,0,1,fp8,fp8,0,0.04394133388996124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,float16,0,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,float16,0,0.04775999983151754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,1,64,128,1,float16,fp8,0,0.03951466580231985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,1,64,128,1,fp8,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,1,64,0,1,float16,fp8,0,0.047968000173568726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,1,64,0,1,fp8,fp8,0,0.045066664616266884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,float16,0,0.037871999045213066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,float16,0,0.047930667797724404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,2,64,128,1,float16,fp8,0,0.038762666285037994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,2,64,128,1,fp8,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,2,64,0,1,float16,fp8,0,0.04794133206208547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,2,64,0,1,fp8,fp8,0,0.0440586656332016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,float16,0,0.03794133414824804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,float16,0,0.04822400212287903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,4,64,128,1,float16,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,4,64,128,1,fp8,fp8,0,0.0376800000667572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,4,64,0,1,float16,fp8,0,0.047775998711586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,4,64,0,1,fp8,fp8,0,0.04377600053946177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,float16,0,0.03969600051641464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,float16,0,0.0481279989083608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,8,64,128,1,float16,fp8,0,0.03956799954175949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,8,64,128,1,fp8,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,8,64,0,1,float16,fp8,0,0.04805333415667216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,8,64,0,1,fp8,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,float16,0,1.0507893562316895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,1,64,128,1,float16,fp8,0,1.050154685974121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,float16,0,1.192570686340332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,1,64,128,1,fp8,fp8,0,0.9719893137613932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,1,64,0,1,fp8,fp8,0,1.0875413417816162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,float16,0,1.057690699895223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,1,64,0,1,float16,fp8,0,1.1923306783040364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,float16,0,1.2067733605702717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,2,64,128,1,fp8,fp8,0,1.021504004796346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,2,64,128,1,float16,fp8,0,1.0560213724772136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,2,64,0,1,float16,fp8,0,1.2000160217285156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,2,64,0,1,fp8,fp8,0,1.1227253278096516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,float16,0,1.0635093053181965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,float16,0,1.2136320273081462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,4,64,128,1,float16,fp8,0,1.061557372411092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,4,64,128,1,fp8,fp8,0,1.0468160311381023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,4,64,0,1,float16,fp8,0,1.2075573603312175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,float16,0,1.0871466795603435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,4,64,0,1,fp8,fp8,0,1.1600533326466878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,8,64,128,1,float16,fp8,0,1.0824586550394695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,8,64,128,1,fp8,fp8,0,1.0681119759877522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,float16,0,1.2340746720631917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,float16,0,0.5796533425649008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,8,64,0,1,fp8,fp8,0,1.1915733019510906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,8,64,0,1,float16,fp8,0,1.2292586962382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,float16,0,0.665615995724996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,24,64,128,1,fp8,fp8,0,0.5654559930165609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,24,64,128,1,float16,fp8,0,0.5714026689529419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,24,64,0,1,float16,fp8,0,0.6552000045776367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,float16,0,0.534437338511149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,24,64,0,1,fp8,fp8,0,0.6345653136571249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,float16,0,0.6060800155003866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,1,64,128,1,float16,fp8,0,0.5338346560796102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,1,64,128,1,fp8,fp8,0,0.492853323618571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,1,64,0,1,float16,fp8,0,0.6071413358052572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,1,64,0,1,fp8,fp8,0,0.5546720027923584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,float16,0,0.5389920075734457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,float16,0,0.6128213405609131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,2,64,128,1,float16,fp8,0,0.5382560094197592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,2,64,128,1,fp8,fp8,0,0.5080693165461222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,2,64,0,1,float16,fp8,0,0.6108373403549194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,2,64,0,1,fp8,fp8,0,0.5655999978383383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,float16,0,0.5426880121231079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,float16,0,0.6189706722895304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,4,64,128,1,float16,fp8,0,0.5418933232625326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,4,64,128,1,fp8,fp8,0,0.5128480195999146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,4,64,0,1,float16,fp8,0,0.6181653340657552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,4,64,0,1,fp8,fp8,0,0.5724960168202718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,float16,0,0.5500266551971436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,float16,0,0.626906673113505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,8,64,128,1,float16,fp8,0,0.5473653475443522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,8,64,128,1,fp8,fp8,0,0.528709332148234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,float16,0,0.30239466826121014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,8,64,0,1,fp8,fp8,0,0.5916639963785807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,float16,0,0.3471786578496297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,8,64,0,1,float16,fp8,0,0.6241066853205363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,24,64,128,1,float16,fp8,0,0.2972799936930339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,24,64,128,1,fp8,fp8,0,0.29266132911046344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,24,64,0,1,float16,fp8,0,0.3415093421936035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,24,64,0,1,fp8,fp8,0,0.32814933856328327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,float16,0,0.2783520023028056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,float16,0,0.31428800026575726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,1,64,128,1,float16,fp8,0,0.2781066695849101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,1,64,128,1,fp8,fp8,0,0.2574666738510132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,1,64,0,1,float16,fp8,0,0.31538132826487225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,1,64,0,1,fp8,fp8,0,0.288154661655426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,float16,0,0.2783626715342204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,float16,0,0.3165973424911499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,2,64,128,1,float16,fp8,0,0.2779093384742737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,2,64,128,1,fp8,fp8,0,0.26320000489552814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,2,64,0,1,float16,fp8,0,0.31514666477839154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,2,64,0,1,fp8,fp8,0,0.2943039933840434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,float16,0,0.28126933177312213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,float16,0,0.3209279974301656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,4,64,128,1,float16,fp8,0,0.2805546720822652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,4,64,128,1,fp8,fp8,0,0.26686400175094604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,4,64,0,1,float16,fp8,0,0.3205706675847371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,4,64,0,1,fp8,fp8,0,0.29709867636362713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,float16,0,0.2855413357416789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,float16,0,0.3267786701520284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,8,64,128,1,float16,fp8,0,0.2850666642189026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,8,64,128,1,fp8,fp8,0,0.2728053331375122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,8,64,0,1,float16,fp8,0,0.3250453273455302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,8,64,0,1,fp8,fp8,0,0.30607465902964276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,float16,0,0.16168000300725302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,float16,0,0.1846239964167277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,24,64,128,1,float16,fp8,0,0.15870400269826254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,24,64,128,1,fp8,fp8,0,0.1574133336544037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,24,64,0,1,float16,fp8,0,0.18227734168370566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,24,64,0,1,fp8,fp8,0,0.17615467309951782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,float16,0,0.1471519966920217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,float16,0,0.1670080025990804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,1,64,128,1,float16,fp8,0,0.14597333470980325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,1,64,128,1,fp8,fp8,0,0.13878400127092996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,1,64,0,1,float16,fp8,0,0.1667626698811849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,1,64,0,1,fp8,fp8,0,0.15457066893577576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,float16,0,0.14805333813031515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,float16,0,0.16659733653068542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,2,64,128,1,float16,fp8,0,0.1464959979057312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,2,64,128,1,fp8,fp8,0,0.14014400045077005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,2,64,0,1,float16,fp8,0,0.16793066263198853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,2,64,0,1,fp8,fp8,0,0.15668800473213196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,float16,0,0.14841600259145102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,float16,0,0.17031466960906982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,4,64,128,1,float16,fp8,0,0.14922666549682617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,4,64,128,1,fp8,fp8,0,0.14203733205795288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,4,64,0,1,float16,fp8,0,0.16883732875188193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,4,64,0,1,fp8,fp8,0,0.1607093314329783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,float16,0,0.15191466609636942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,float16,0,0.1725333333015442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,8,64,128,1,float16,fp8,0,0.15077333648999533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,8,64,128,1,fp8,fp8,0,0.1464853286743164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,8,64,0,1,float16,fp8,0,0.17126933733622232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,float16,0,0.09070400396982829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,8,64,0,1,fp8,fp8,0,0.16375999649365744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,float16,0,0.10281067093213399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,24,64,128,1,float16,fp8,0,0.08905599514643352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,24,64,128,1,fp8,fp8,0,0.09091732899347942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,24,64,0,1,float16,fp8,0,0.10357333223025005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,24,64,0,1,fp8,fp8,0,0.09935466448465984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,float16,0,0.08265066643555959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,float16,0,0.09491200248400371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,1,64,128,1,float16,fp8,0,0.08301866551240285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,1,64,128,1,fp8,fp8,0,0.07562133173147838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,1,64,0,1,float16,fp8,0,0.09467732906341553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,1,64,0,1,fp8,fp8,0,0.08535466591517131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,float16,0,0.08182933429876964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,float16,0,0.09379733602205913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,2,64,128,1,float16,fp8,0,0.0828000009059906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,2,64,128,1,fp8,fp8,0,0.07707733412583669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,2,64,0,1,float16,fp8,0,0.09390933314959209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,2,64,0,1,fp8,fp8,0,0.08688533306121826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,float16,0,0.08294933537642162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,float16,0,0.09371200203895569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,4,64,128,1,float16,fp8,0,0.0830026666323344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,4,64,128,1,fp8,fp8,0,0.07762133578459422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,4,64,0,1,float16,fp8,0,0.09333333373069763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,4,64,0,1,fp8,fp8,0,0.08698667089144389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,float16,0,0.08358400066693623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,float16,0,0.0953546663125356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,8,64,128,1,float16,fp8,0,0.0844533344109853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,8,64,128,1,fp8,fp8,0,0.0806879997253418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,8,64,0,1,float16,fp8,0,0.09500267108281453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,8,64,0,1,fp8,fp8,0,0.09058666229248047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,float16,0,0.05188799897829691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,float16,0,0.059205333391825356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,24,64,128,1,float16,fp8,0,0.05189333359400431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,24,64,128,1,fp8,fp8,0,0.05091199775536855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,24,64,0,1,float16,fp8,0,0.05877333382765452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,24,64,0,1,fp8,fp8,0,0.05589866638183594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,float16,0,0.05043200155099233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,float16,0,0.05845333139101664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,1,64,128,1,float16,fp8,0,0.05007466673851013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,1,64,128,1,fp8,fp8,0,0.047557334105173744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,1,64,0,1,float16,fp8,0,0.05807999769846598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,1,64,0,1,fp8,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,float16,0,0.049738665421803795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,float16,0,0.058864002426465355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,2,64,128,1,float16,fp8,0,0.050010666251182556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,2,64,128,1,fp8,fp8,0,0.047024001677831016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,2,64,0,1,float16,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,2,64,0,1,fp8,fp8,0,0.05420800050099691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,float16,0,0.05013866722583771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,float16,0,0.05659199754397074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,4,64,128,1,float16,fp8,0,0.05114666620890299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,4,64,128,1,fp8,fp8,0,0.047370667258898415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,4,64,0,1,float16,fp8,0,0.05774400134881338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,4,64,0,1,fp8,fp8,0,0.053946668903032936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,float16,0,0.05008000135421753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,float16,0,0.058304001887639366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,8,64,128,1,float16,fp8,0,0.05109333495299021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,8,64,128,1,fp8,fp8,0,0.0483893354733785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,8,64,0,1,float16,fp8,0,0.05899199843406677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,8,64,0,1,fp8,fp8,0,0.0536053329706192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,float16,0,0.035760000348091125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,float16,0,0.039733332892258964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,24,64,128,1,float16,fp8,0,0.03533333291610082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,24,64,128,1,fp8,fp8,0,0.033743999898433685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,24,64,0,1,float16,fp8,0,0.040074666341145836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,24,64,0,1,fp8,fp8,0,0.037461332976818085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,float16,0,0.033626665671666466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,float16,0,0.039488000174363456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,1,64,128,1,float16,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,1,64,128,1,fp8,fp8,0,0.03359466542800268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,1,64,0,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,1,64,0,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,float16,0,0.035930665830771126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,float16,0,0.039861333866914116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,2,64,128,1,float16,fp8,0,0.03549333413441976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,2,64,128,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,2,64,0,1,float16,fp8,0,0.03957333415746689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,2,64,0,1,fp8,fp8,0,0.03573866685231527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,float16,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,float16,0,0.04012266546487808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,4,64,128,1,float16,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,4,64,128,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,4,64,0,1,float16,fp8,0,0.03995199998219808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,4,64,0,1,fp8,fp8,0,0.037845333417256675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,float16,0,0.03482666611671448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,float16,0,0.04057066639264425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,8,64,128,1,float16,fp8,0,0.035445332527160645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,8,64,128,1,fp8,fp8,0,0.03388266762097677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,8,64,0,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,8,64,0,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,float16,0,0.026933332284291584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,24,64,128,1,float16,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,24,64,128,1,fp8,fp8,0,0.025706666211287182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,24,64,0,1,float16,float16,0,0.031888000667095184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,float16,0,0.025573333104451496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,24,64,0,1,fp8,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,float16,0,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,1,64,128,1,float16,fp8,0,0.025936000049114227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,1,64,128,1,fp8,fp8,0,0.02364266663789749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,1,64,0,1,float16,fp8,0,0.030005333324273426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,1,64,0,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,float16,0,0.02595199892918269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,float16,0,0.029152000943819683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,2,64,128,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,2,64,128,1,fp8,fp8,0,0.024362665911515553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,2,64,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,2,64,0,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,float16,0,0.025744001070658367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,float16,0,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,4,64,128,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,4,64,0,1,float16,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,4,64,128,1,fp8,fp8,0,0.0249439999461174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,4,64,0,1,fp8,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,8,64,128,1,float16,fp8,0,0.027119999130566914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,float16,0,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,8,64,128,1,fp8,fp8,0,0.025968000292778015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,8,64,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,8,64,0,1,fp8,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,1,64,128,1,float16,float16,0,1.0160373051961262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,1,64,0,1,float16,float16,0,1.0187946955362956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,1,64,128,1,float16,fp8,0,1.021509329477946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,1,64,128,1,fp8,fp8,0,0.9445546468098959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,1,64,0,1,float16,fp8,0,1.0184266567230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,1,64,0,1,fp8,fp8,0,0.9373439947764078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,2,64,128,1,float16,float16,0,1.029088020324707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,2,64,0,1,float16,float16,0,1.0332213242848713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,2,64,128,1,float16,fp8,0,1.0295146306355794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,2,64,128,1,fp8,fp8,0,0.990511973698934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,2,64,0,1,float16,fp8,0,1.0274293422698975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,2,64,0,1,fp8,fp8,0,0.9886720180511475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,4,64,128,1,float16,float16,0,1.0375146865844727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,4,64,0,1,float16,float16,0,1.0405653317769368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,4,64,128,1,float16,fp8,0,1.0355573495229085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,4,64,128,1,fp8,fp8,0,1.0190719763437908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,4,64,0,1,float16,fp8,0,1.0352426369984944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,4,64,0,1,fp8,fp8,0,1.0175093015034993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,8,64,128,1,float16,float16,0,1.0572266578674316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,8,64,0,1,float16,float16,0,1.0715946356455486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,8,64,128,1,fp8,fp8,0,1.043013334274292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,8,64,128,1,float16,fp8,0,1.0589866638183594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,8,64,0,1,float16,fp8,0,1.0683200359344482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,8,64,0,1,fp8,fp8,0,1.0438613096872966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,24,64,0,1,float16,float16,0,0.5734133323033651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,24,64,128,1,float16,float16,0,0.5664159854253134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,24,64,128,1,float16,fp8,0,0.5563253164291382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,24,64,128,1,fp8,fp8,0,0.553546667098999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,24,64,0,1,float16,fp8,0,0.5625653266906738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,24,64,0,1,fp8,fp8,0,0.5550506512324015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,1,64,128,1,float16,float16,0,0.5175733168919882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,1,64,0,1,float16,float16,0,0.5196586847305298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,1,64,128,1,float16,fp8,0,0.5177973508834839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,1,64,128,1,fp8,fp8,0,0.481386661529541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,1,64,0,1,float16,fp8,0,0.5191893180211385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,1,64,0,1,fp8,fp8,0,0.47895999749501544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,2,64,128,1,float16,float16,0,0.5244586865107218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,2,64,128,1,float16,fp8,0,0.523141344388326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,2,64,0,1,float16,float16,0,0.5249280134836832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,2,64,128,1,fp8,fp8,0,0.4946773449579875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,2,64,0,1,float16,fp8,0,0.5224266846974691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,2,64,0,1,fp8,fp8,0,0.4894719918568929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,4,64,128,1,float16,float16,0,0.528986652692159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,4,64,0,1,float16,float16,0,0.5307466586430868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,4,64,128,1,fp8,fp8,0,0.49961598714192706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,4,64,128,1,float16,fp8,0,0.5273760159810384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,4,64,0,1,float16,fp8,0,0.528767983118693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,4,64,0,1,fp8,fp8,0,0.4978666702906291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,8,64,128,1,float16,float16,0,0.5363786617914835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,8,64,0,1,float16,float16,0,0.5366613467534384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,8,64,128,1,float16,fp8,0,0.5335626602172852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,24,64,128,1,float16,float16,0,0.29691733916600543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,8,64,128,1,fp8,fp8,0,0.517136017481486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,8,64,0,1,fp8,fp8,0,0.5160693327585856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,8,64,0,1,float16,fp8,0,0.5370453198750814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,24,64,0,1,float16,float16,0,0.2996906638145447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,24,64,128,1,float16,fp8,0,0.2897813320159912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,24,64,128,1,fp8,fp8,0,0.2865440050760905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,24,64,0,1,float16,fp8,0,0.293887992699941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,24,64,0,1,fp8,fp8,0,0.28829866647720337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,1,64,128,1,float16,float16,0,0.2699306607246399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,1,64,0,1,float16,float16,0,0.2699573238690694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,1,64,128,1,float16,fp8,0,0.26951466004053753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,1,64,128,1,fp8,fp8,0,0.2507893244425456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,1,64,0,1,float16,fp8,0,0.269381324450175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,1,64,0,1,fp8,fp8,0,0.2490560015042623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,2,64,128,1,float16,float16,0,0.2701920072237651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,2,64,0,1,float16,float16,0,0.27131734291712445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,2,64,128,1,float16,fp8,0,0.2702346642812093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,2,64,128,1,fp8,fp8,0,0.2567946712176005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,2,64,0,1,float16,fp8,0,0.27002133925755817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,2,64,0,1,fp8,fp8,0,0.25545066595077515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,4,64,128,1,float16,float16,0,0.2741120060284932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,4,64,0,1,float16,float16,0,0.27431466182072956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,4,64,128,1,float16,fp8,0,0.27293866872787476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,4,64,128,1,fp8,fp8,0,0.2600586613019307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,4,64,0,1,float16,fp8,0,0.27569599946339923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,4,64,0,1,fp8,fp8,0,0.2572266658147176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,8,64,128,1,float16,float16,0,0.27948800722757977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,8,64,0,1,float16,float16,0,0.2805386583010356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,8,64,128,1,float16,fp8,0,0.27799467245737713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,8,64,128,1,fp8,fp8,0,0.2685333291689555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,8,64,0,1,float16,fp8,0,0.2797013322512309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,24,64,128,1,float16,float16,0,0.15602133671442667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,8,64,0,1,fp8,fp8,0,0.26682132482528687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,24,64,0,1,float16,float16,0,0.15895467003186545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,24,64,128,1,float16,fp8,0,0.15475199619928995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,24,64,128,1,fp8,fp8,0,0.15479999780654907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,24,64,0,1,float16,fp8,0,0.15691199898719788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,24,64,0,1,fp8,fp8,0,0.15457600355148315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,1,64,128,1,float16,float16,0,0.14426666498184204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,1,64,0,1,float16,float16,0,0.14272532860438028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,1,64,128,1,float16,fp8,0,0.14341333508491516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,1,64,128,1,fp8,fp8,0,0.1353333294391632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,1,64,0,1,float16,fp8,0,0.14273599783579508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,1,64,0,1,fp8,fp8,0,0.13402133186658224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,2,64,128,1,float16,float16,0,0.14456533392270407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,2,64,0,1,float16,float16,0,0.14299733440081278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,2,64,128,1,float16,fp8,0,0.14406399925549826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,2,64,128,1,fp8,fp8,0,0.13707199692726135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,2,64,0,1,float16,fp8,0,0.14311466614405313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,2,64,0,1,fp8,fp8,0,0.135535995165507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,4,64,128,1,float16,float16,0,0.14452800154685974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,4,64,0,1,float16,float16,0,0.1463573376337687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,4,64,128,1,float16,fp8,0,0.14573867122332254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,4,64,128,1,fp8,fp8,0,0.13969600200653076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,4,64,0,1,float16,fp8,0,0.1460586686929067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,4,64,0,1,fp8,fp8,0,0.1381653348604838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,8,64,128,1,float16,float16,0,0.1472053329149882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,8,64,128,1,float16,fp8,0,0.14708800117174783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,8,64,0,1,float16,float16,0,0.14814399679501852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,8,64,128,1,fp8,fp8,0,0.14450666308403015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,24,64,128,1,float16,float16,0,0.0888426701227824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,8,64,0,1,float16,fp8,0,0.14848533272743225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,8,64,0,1,fp8,fp8,0,0.14243732889493307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,24,64,0,1,float16,float16,0,0.08947199583053589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,24,64,128,1,float16,fp8,0,0.08712533116340637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,24,64,128,1,fp8,fp8,0,0.08924266695976257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,24,64,0,1,float16,fp8,0,0.0890773336092631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,1,64,128,1,float16,float16,0,0.08072533210118611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,24,64,0,1,fp8,fp8,0,0.08799999952316284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,1,64,128,1,float16,fp8,0,0.08055466910203297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,1,64,0,1,float16,float16,0,0.07999466856320699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,1,64,128,1,fp8,fp8,0,0.07503999769687653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,1,64,0,1,float16,fp8,0,0.08062933385372162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,1,64,0,1,fp8,fp8,0,0.0726453314224879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,2,64,128,1,float16,float16,0,0.08083733419577281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,2,64,0,1,float16,float16,0,0.0804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,2,64,128,1,float16,fp8,0,0.08108800152937572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,2,64,128,1,fp8,fp8,0,0.0763679991165797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,2,64,0,1,float16,fp8,0,0.07996800045172374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,2,64,0,1,fp8,fp8,0,0.07453333338101704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,4,64,128,1,float16,float16,0,0.0817440003156662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,4,64,0,1,float16,float16,0,0.08070933322111766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,4,64,128,1,float16,fp8,0,0.08118933439254761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,4,64,128,1,fp8,fp8,0,0.07620266576608022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,4,64,0,1,float16,fp8,0,0.08012799918651581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,4,64,0,1,fp8,fp8,0,0.0746613343556722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,8,64,128,1,float16,float16,0,0.08123200138409932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,8,64,0,1,float16,float16,0,0.08258133133252461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,8,64,128,1,float16,fp8,0,0.08155199885368347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,8,64,128,1,fp8,fp8,0,0.07924266656239827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,8,64,0,1,float16,fp8,0,0.08169066905975342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,8,64,0,1,fp8,fp8,0,0.07751466830571492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,24,64,128,1,float16,float16,0,0.05093866586685181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,24,64,0,1,float16,float16,0,0.05189333359400431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,24,64,128,1,float16,fp8,0,0.052202666799227394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,24,64,128,1,fp8,fp8,0,0.051167999704678856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,24,64,0,1,float16,fp8,0,0.05169600248336792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,24,64,0,1,fp8,fp8,0,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,1,64,0,1,float16,float16,0,0.04981866478919983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,1,64,128,1,float16,float16,0,0.05077333251635233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,1,64,128,1,float16,fp8,0,0.04881600042184194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,1,64,128,1,fp8,fp8,0,0.04673066735267639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,1,64,0,1,float16,fp8,0,0.049466664592425026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,2,64,128,1,float16,float16,0,0.048245335618654885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,2,64,0,1,float16,float16,0,0.04977599779764811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,1,64,0,1,fp8,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,2,64,128,1,float16,fp8,0,0.049253334601720176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,2,64,128,1,fp8,fp8,0,0.046575998266537987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,2,64,0,1,float16,fp8,0,0.04940799872080485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,2,64,0,1,fp8,fp8,0,0.04664533336957296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,4,64,128,1,float16,float16,0,0.04978133241335551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,4,64,0,1,float16,float16,0,0.04972266654173533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,4,64,128,1,float16,fp8,0,0.04934933284918467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,4,64,128,1,fp8,fp8,0,0.04614933331807455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,4,64,0,1,float16,fp8,0,0.05008533100287119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,4,64,0,1,fp8,fp8,0,0.04567466676235199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,8,64,128,1,float16,float16,0,0.0499839981396993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,8,64,0,1,float16,float16,0,0.0497920016447703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,8,64,128,1,float16,fp8,0,0.049738665421803795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,8,64,128,1,fp8,fp8,0,0.04640000065167745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,8,64,0,1,float16,fp8,0,0.05004799862702688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,8,64,0,1,fp8,fp8,0,0.04628799855709076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,24,64,128,1,float16,float16,0,0.035429333647092186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,24,64,0,1,float16,float16,0,0.033359999457995095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,24,64,128,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,24,64,128,1,fp8,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,24,64,0,1,float16,fp8,0,0.03380800038576126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,24,64,0,1,fp8,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,1,64,128,1,float16,float16,0,0.03469866762558619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,1,64,0,1,float16,float16,0,0.03387200087308884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,1,64,128,1,float16,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,1,64,128,1,fp8,fp8,0,0.032058666149775185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,1,64,0,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,1,64,0,1,fp8,fp8,0,0.031290667752424874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,2,64,128,1,float16,float16,0,0.03457599878311157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,2,64,0,1,float16,float16,0,0.03348266581694285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,2,64,128,1,float16,fp8,0,0.03382399926582972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,2,64,128,1,fp8,fp8,0,0.032885332902272545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,2,64,0,1,float16,fp8,0,0.03389333436886469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,2,64,0,1,fp8,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,4,64,128,1,float16,float16,0,0.03568533311287562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,4,64,0,1,float16,float16,0,0.03519999980926514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,4,64,128,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,4,64,128,1,fp8,fp8,0,0.03311466674009959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,4,64,0,1,float16,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,4,64,0,1,fp8,fp8,0,0.03376533339420954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,8,64,128,1,float16,float16,0,0.03403199960788091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,8,64,0,1,float16,float16,0,0.0354720006386439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,8,64,128,1,float16,fp8,0,0.035301332672437034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,8,64,128,1,fp8,fp8,0,0.033215999603271484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,8,64,0,1,float16,fp8,0,0.03411199897527695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,8,64,0,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,24,64,128,1,float16,float16,0,0.02566933383544286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,24,64,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,24,64,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,24,64,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,24,64,128,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,24,64,0,1,fp8,fp8,0,0.02571733295917511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,1,64,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,1,64,128,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,1,64,128,1,float16,fp8,0,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,1,64,0,1,float16,fp8,0,0.025093334416548412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,1,64,128,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,1,64,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,2,64,128,1,float16,float16,0,0.02510933329661687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,2,64,0,1,float16,float16,0,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,2,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,2,64,128,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,2,64,0,1,float16,fp8,0,0.025637333591779072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,4,64,128,1,float16,float16,0,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,2,64,0,1,fp8,fp8,0,0.02426133304834366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,4,64,0,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,4,64,128,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,4,64,0,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,4,64,128,1,fp8,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,4,64,0,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,8,64,128,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,8,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,8,64,0,1,float16,float16,0,0.02714666724205017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,8,64,128,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,8,64,0,1,float16,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,8,64,0,1,fp8,fp8,0,0.025957333544890087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,24,64,128,1,float16,float16,0,0.022229333718617756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,24,64,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,24,64,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,24,64,128,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,24,64,128,1,float16,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,24,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,1,64,128,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,1,64,0,1,float16,float16,0,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,1,64,128,1,fp8,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,1,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,1,64,0,1,float16,fp8,0,0.02162133405605952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,1,64,0,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,2,64,128,1,float16,float16,0,0.020917333662509918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,2,64,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,2,64,128,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,2,64,128,1,fp8,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,2,64,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,2,64,0,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,4,64,128,1,float16,float16,0,0.021738665799299877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,4,64,128,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,4,64,0,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,4,64,128,1,fp8,fp8,0,0.02080533280968666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,4,64,0,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,4,64,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,8,64,0,1,float16,float16,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,8,64,128,1,float16,float16,0,0.021701333423455555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,8,64,128,1,float16,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,8,64,128,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,8,64,0,1,float16,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,8,64,0,1,fp8,fp8,0,0.020096000283956528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,1,64,128,1,float16,float16,0,0.4792106548945109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,1,64,0,1,float16,float16,0,0.4708106517791748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,1,64,128,1,float16,fp8,0,0.48057599862416583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,1,64,128,1,fp8,fp8,0,0.4413280089696248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,1,64,0,1,fp8,fp8,0,0.425983985265096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,1,64,0,1,float16,fp8,0,0.4678453207015991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,2,64,128,1,float16,float16,0,0.4862080017725627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,2,64,128,1,fp8,fp8,0,0.45575467745463055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,2,64,0,1,float16,float16,0,0.4771253267923991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,2,64,128,1,float16,fp8,0,0.48389331499735516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,2,64,0,1,float16,fp8,0,0.4742666482925415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,2,64,0,1,fp8,fp8,0,0.43909335136413574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,4,64,128,1,float16,float16,0,0.49111465613047284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,4,64,0,1,float16,float16,0,0.47891732056935626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,4,64,128,1,float16,fp8,0,0.488597313563029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,4,64,128,1,fp8,fp8,0,0.4595786730448405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,4,64,0,1,float16,fp8,0,0.47683199246724445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,4,64,0,1,fp8,fp8,0,0.4451520045598348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,8,64,128,1,float16,float16,0,0.5005973180135092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,8,64,0,1,float16,float16,0,0.48719998200734455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,8,64,128,1,float16,fp8,0,0.4941600163777669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,8,64,128,1,fp8,fp8,0,0.48686933517456055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,8,64,0,1,float16,fp8,0,0.48316800594329834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,8,64,0,1,fp8,fp8,0,0.4732746680577596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,24,64,128,1,float16,float16,0,0.27746667464574176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,24,64,0,1,float16,float16,0,0.27110934257507324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,24,64,128,1,float16,fp8,0,0.27146132787068683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,24,64,128,1,fp8,fp8,0,0.26818666855494183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,24,64,0,1,float16,fp8,0,0.2651093403498332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,24,64,0,1,fp8,fp8,0,0.2594826618830363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,1,64,128,1,float16,float16,0,0.26130133867263794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,1,64,0,1,float16,float16,0,0.24543466170628866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,1,64,128,1,float16,fp8,0,0.25124265750249225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,1,64,128,1,fp8,fp8,0,0.23138666152954102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,1,64,0,1,float16,fp8,0,0.24518932898839316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,1,64,0,1,fp8,fp8,0,0.22214933236440024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,2,64,0,1,float16,float16,0,0.2461120088895162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,2,64,128,1,float16,float16,0,0.2524533271789551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,2,64,128,1,float16,fp8,0,0.25093867381413776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,2,64,128,1,fp8,fp8,0,0.23742934068044028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,2,64,0,1,float16,fp8,0,0.24551467100779215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,2,64,0,1,fp8,fp8,0,0.23071465889612833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,4,64,128,1,float16,float16,0,0.25248533487319946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,4,64,0,1,float16,float16,0,0.24842133124669394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,4,64,128,1,float16,fp8,0,0.2530933419863383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,4,64,128,1,fp8,fp8,0,0.23858133951822916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,4,64,0,1,float16,fp8,0,0.24807999531428018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,4,64,0,1,fp8,fp8,0,0.2329919934272766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,8,64,128,1,float16,float16,0,0.25963733593622845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,8,64,0,1,float16,float16,0,0.2542240023612976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,8,64,128,1,float16,fp8,0,0.25786133607228595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,8,64,128,1,fp8,fp8,0,0.24787733952204385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,8,64,0,1,float16,fp8,0,0.2521386742591858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,24,64,0,1,float16,float16,0,0.14469866951306662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,24,64,128,1,float16,float16,0,0.1474399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,8,64,0,1,fp8,fp8,0,0.23965867360432944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,24,64,128,1,float16,fp8,0,0.1467413306236267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,24,64,128,1,fp8,fp8,0,0.14492266376813254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,24,64,0,1,fp8,fp8,0,0.14073066910107931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,24,64,0,1,float16,fp8,0,0.14366400241851807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,1,64,128,1,float16,float16,0,0.13481600085894266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,1,64,0,1,float16,float16,0,0.13235200444857279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,1,64,128,1,float16,fp8,0,0.13410133123397827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,1,64,128,1,fp8,fp8,0,0.12619200348854065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,1,64,0,1,fp8,fp8,0,0.11953066786130269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,1,64,0,1,float16,fp8,0,0.13181333740552267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,2,64,128,1,float16,float16,0,0.13638933499654135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,2,64,0,1,float16,float16,0,0.13246400157610574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,2,64,128,1,float16,fp8,0,0.13428266843159994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,2,64,128,1,fp8,fp8,0,0.12727999687194824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,2,64,0,1,float16,fp8,0,0.1322826643784841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,2,64,0,1,fp8,fp8,0,0.1244053343931834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,4,64,128,1,float16,float16,0,0.13609066605567932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,4,64,0,1,float16,float16,0,0.13292800386746725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,4,64,128,1,float16,fp8,0,0.13658133149147034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,4,64,0,1,float16,fp8,0,0.13332800070444742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,4,64,128,1,fp8,fp8,0,0.13014933466911316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,4,64,0,1,fp8,fp8,0,0.1260479986667633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,8,64,128,1,float16,float16,0,0.13854933778444925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,8,64,0,1,float16,float16,0,0.13488533099492392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,8,64,128,1,float16,fp8,0,0.13859732945760092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,8,64,128,1,fp8,fp8,0,0.1346453328927358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,24,64,128,1,float16,float16,0,0.0865066647529602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,8,64,0,1,float16,fp8,0,0.136245330174764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,8,64,0,1,fp8,fp8,0,0.1300159990787506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,24,64,0,1,float16,float16,0,0.08290666838486989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,24,64,128,1,float16,fp8,0,0.08386666576067607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,24,64,0,1,float16,fp8,0,0.08261333405971527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,24,64,128,1,fp8,fp8,0,0.08567466338475545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,1,64,128,1,float16,float16,0,0.07685866455237071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,24,64,0,1,fp8,fp8,0,0.08281599978605907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,1,64,128,1,float16,fp8,0,0.07746133208274841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,1,64,0,1,float16,float16,0,0.07629333436489105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,1,64,128,1,fp8,fp8,0,0.07043733199437459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,1,64,0,1,float16,fp8,0,0.07471466561158498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,1,64,0,1,fp8,fp8,0,0.06899733344713847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,2,64,128,1,float16,float16,0,0.07824533184369405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,2,64,0,1,float16,float16,0,0.07514666517575581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,2,64,128,1,float16,fp8,0,0.07726400097211202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,2,64,128,1,fp8,fp8,0,0.07225599884986877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,2,64,0,1,float16,fp8,0,0.07472000022729237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,2,64,0,1,fp8,fp8,0,0.06859200199445088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,4,64,128,1,float16,float16,0,0.07721066474914551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,4,64,128,1,float16,fp8,0,0.07776533563931783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,4,64,0,1,float16,float16,0,0.075162669022878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,4,64,128,1,fp8,fp8,0,0.07061333457628886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,4,64,0,1,float16,fp8,0,0.07484800120194753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,4,64,0,1,fp8,fp8,0,0.06867200136184692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,8,64,128,1,float16,float16,0,0.07878933350245158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,8,64,0,1,float16,float16,0,0.07622933387756348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,8,64,128,1,float16,fp8,0,0.07869333525498708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,8,64,128,1,fp8,fp8,0,0.0746559997399648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,8,64,0,1,fp8,fp8,0,0.07099733253320058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,8,64,0,1,float16,fp8,0,0.07564799984296162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,24,64,0,1,float16,float16,0,0.04780266682306925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,24,64,128,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,24,64,128,1,float16,float16,0,0.048954665660858154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,24,64,128,1,fp8,fp8,0,0.04756266872088114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,24,64,0,1,float16,fp8,0,0.048207998275756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,24,64,0,1,fp8,fp8,0,0.04590400060017904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,1,64,128,1,float16,float16,0,0.04602666695912679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,1,64,0,1,float16,float16,0,0.045237332582473755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,1,64,128,1,float16,fp8,0,0.04674666623274485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,1,64,128,1,fp8,fp8,0,0.04390933116277059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,1,64,0,1,fp8,fp8,0,0.04165333261092504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,1,64,0,1,float16,fp8,0,0.045408000548680626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,2,64,128,1,float16,float16,0,0.04952533543109894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,2,64,0,1,float16,float16,0,0.046112000942230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,2,64,128,1,float16,fp8,0,0.04825599988301595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,2,64,128,1,fp8,fp8,0,0.044122666120529175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,2,64,0,1,float16,fp8,0,0.044549331068992615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,2,64,0,1,fp8,fp8,0,0.04297066728274027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,4,64,128,1,float16,float16,0,0.04780800143877665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,4,64,0,1,float16,float16,0,0.044735997915267944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,4,64,128,1,float16,fp8,0,0.04784533381462097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,4,64,128,1,fp8,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,4,64,0,1,float16,fp8,0,0.045221333702405296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,4,64,0,1,fp8,fp8,0,0.04372266431649526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,8,64,0,1,float16,float16,0,0.04600533346335093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,8,64,128,1,float16,fp8,0,0.04718933502833048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,8,64,128,1,float16,float16,0,0.04682666560014089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,8,64,0,1,float16,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,8,64,128,1,fp8,fp8,0,0.044218664367993675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,8,64,0,1,fp8,fp8,0,0.04284266630808512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,24,64,128,1,float16,float16,0,0.033717334270477295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,24,64,0,1,float16,float16,0,0.03350399931271871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,24,64,128,1,float16,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,24,64,0,1,float16,fp8,0,0.031727999448776245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,24,64,128,1,fp8,fp8,0,0.03187733391920725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,24,64,0,1,fp8,fp8,0,0.03070933371782303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,1,64,0,1,float16,float16,0,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,1,64,128,1,float16,float16,0,0.033344000577926636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,1,64,128,1,float16,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,1,64,128,1,fp8,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,1,64,0,1,float16,fp8,0,0.031871999303499855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,1,64,0,1,fp8,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,2,64,128,1,float16,float16,0,0.03365866591533025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,2,64,0,1,float16,float16,0,0.03196800003449122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,2,64,128,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,2,64,128,1,fp8,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,2,64,0,1,float16,fp8,0,0.03284800052642822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,4,64,128,1,float16,float16,0,0.03385599950949351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,4,64,0,1,float16,float16,0,0.032032000521818794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,2,64,0,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,4,64,128,1,float16,fp8,0,0.0337119996547699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,4,64,128,1,fp8,fp8,0,0.03175999969244003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,4,64,0,1,float16,fp8,0,0.03160533308982849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,8,64,128,1,float16,float16,0,0.033733333150545754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,4,64,0,1,fp8,fp8,0,0.029824001093705494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,8,64,0,1,float16,float16,0,0.03182400017976761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,8,64,128,1,float16,fp8,0,0.03401600072781245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,8,64,128,1,fp8,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,8,64,0,1,float16,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,8,64,0,1,fp8,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,24,64,128,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,24,64,0,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,24,64,128,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,24,64,128,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,24,64,0,1,float16,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,24,64,0,1,fp8,fp8,0,0.023786666492621105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,1,64,0,1,float16,float16,0,0.023546665906906128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,1,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,1,64,128,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,1,64,0,1,fp8,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,2,64,128,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,1,64,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,2,64,0,1,float16,float16,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,2,64,128,1,float16,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,2,64,128,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,2,64,0,1,float16,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,2,64,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,4,64,128,1,float16,float16,0,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,4,64,0,1,float16,float16,0,0.023743999501069386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,4,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,4,64,128,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,4,64,0,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,4,64,0,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,8,64,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,8,64,128,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,8,64,128,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,8,64,128,1,fp8,fp8,0,0.023754666248957317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,8,64,0,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,8,64,0,1,fp8,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,24,64,128,1,float16,float16,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,24,64,128,1,float16,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,24,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,24,64,128,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,24,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,24,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,1,64,128,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,1,64,128,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,1,64,0,1,float16,float16,0,0.021856000026067097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,1,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,1,64,0,1,float16,fp8,0,0.01978133370478948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,1,64,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,2,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,2,64,0,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,2,64,128,1,float16,float16,0,0.022410665949185688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,2,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,2,64,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,4,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,4,64,0,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,2,64,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,4,64,128,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,4,64,128,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,4,64,0,1,float16,fp8,0,0.019727999965349834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,4,64,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,8,64,128,1,float16,float16,0,0.020879998803138733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,8,64,0,1,float16,float16,0,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,8,64,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,8,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,8,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,8,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,24,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,24,64,0,1,float16,float16,0,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,24,64,128,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,24,64,128,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,24,64,0,1,fp8,fp8,0,0.017621333400408428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,24,64,0,1,float16,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,1,64,128,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,1,64,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,1,64,128,1,float16,fp8,0,0.01971199984351794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,1,64,128,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,1,64,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,2,64,128,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,1,64,0,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,2,64,0,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,2,64,128,1,float16,fp8,0,0.020479999482631683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,2,64,128,1,fp8,fp8,0,0.01762666677435239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,2,64,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,2,64,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,4,64,128,1,float16,float16,0,0.019941333681344986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,4,64,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,4,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,4,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,4,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,4,64,0,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,8,64,128,1,float16,float16,0,0.019920000185569126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,8,64,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,8,64,128,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,8,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,8,64,0,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,8,64,0,1,fp8,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,1,64,128,1,float16,fp8,0,0.2590986688931783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,1,64,0,1,float16,float16,0,0.26100265979766846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,1,64,128,1,float16,float16,0,0.2620533307393392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,1,64,128,1,fp8,fp8,0,0.24202666680018106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,1,64,0,1,float16,fp8,0,0.2595146695772807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,1,64,0,1,fp8,fp8,0,0.2437386711438497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,2,64,128,1,float16,float16,0,0.26078399022420246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,2,64,0,1,float16,float16,0,0.2609600027402242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,2,64,128,1,float16,fp8,0,0.2613973418871562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,2,64,128,1,fp8,fp8,0,0.24892799059549967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,2,64,0,1,float16,fp8,0,0.25915199518203735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,2,64,0,1,fp8,fp8,0,0.2508959968884786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,4,64,128,1,float16,float16,0,0.26309865713119507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,4,64,0,1,float16,float16,0,0.2636959950129191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,4,64,128,1,float16,fp8,0,0.2640213370323181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,4,64,128,1,fp8,fp8,0,0.25194666783014935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,4,64,0,1,float16,fp8,0,0.26173333326975506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,4,64,0,1,fp8,fp8,0,0.25468266010284424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,8,64,0,1,float16,float16,0,0.2656586567560832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,8,64,128,1,float16,float16,0,0.2662880023320516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,8,64,128,1,float16,fp8,0,0.2656746705373128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,24,64,128,1,float16,float16,0,0.15181333820025125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,8,64,128,1,fp8,fp8,0,0.2638453245162964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,8,64,0,1,float16,fp8,0,0.2644266684850057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,8,64,0,1,fp8,fp8,0,0.26202134291330975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,24,64,0,1,float16,float16,0,0.15157866477966309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,24,64,128,1,fp8,fp8,0,0.15052266915639242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,24,64,0,1,float16,fp8,0,0.14788800477981567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,24,64,128,1,float16,fp8,0,0.15033599734306335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,24,64,0,1,fp8,fp8,0,0.151418666044871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,1,64,128,1,float16,float16,0,0.13876799742380777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,1,64,0,1,float16,float16,0,0.13801599542299905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,1,64,128,1,float16,fp8,0,0.13903466860453287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,1,64,128,1,fp8,fp8,0,0.130213330189387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,1,64,0,1,float16,fp8,0,0.13938132921854654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,1,64,0,1,fp8,fp8,0,0.12993066509564719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,2,64,128,1,float16,float16,0,0.1389173368612925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,2,64,128,1,float16,fp8,0,0.1386186679204305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,2,64,128,1,fp8,fp8,0,0.13246400157610574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,2,64,0,1,float16,float16,0,0.13993600010871887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,2,64,0,1,fp8,fp8,0,0.1322719951470693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,4,64,128,1,float16,float16,0,0.1402453382809957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,4,64,0,1,float16,float16,0,0.14075733224550882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,2,64,0,1,float16,fp8,0,0.1386560002962748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,4,64,128,1,float16,fp8,0,0.13913066188494363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,4,64,128,1,fp8,fp8,0,0.13555733362833658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,4,64,0,1,float16,fp8,0,0.14070933063824972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,4,64,0,1,fp8,fp8,0,0.1365120013554891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,8,64,128,1,float16,float16,0,0.1426346699396769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,8,64,0,1,float16,float16,0,0.1422879993915558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,8,64,128,1,float16,fp8,0,0.14246400197347006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,8,64,128,1,fp8,fp8,0,0.1404906709988912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,8,64,0,1,float16,fp8,0,0.14152000347773233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,8,64,0,1,fp8,fp8,0,0.13959999879201254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,24,64,128,1,float16,float16,0,0.08516800403594971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,24,64,128,1,float16,fp8,0,0.08295466502507527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,24,64,0,1,float16,float16,0,0.08588799834251404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,24,64,128,1,fp8,fp8,0,0.0867733359336853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,24,64,0,1,float16,fp8,0,0.08372799555460612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,24,64,0,1,fp8,fp8,0,0.0853653351465861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,1,64,128,1,float16,float16,0,0.07794133325417836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,1,64,0,1,float16,float16,0,0.07734933495521545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,1,64,128,1,fp8,fp8,0,0.07248533268769582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,1,64,128,1,float16,fp8,0,0.07724266747633617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,1,64,0,1,float16,fp8,0,0.07733866572380066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,1,64,0,1,fp8,fp8,0,0.07293333113193512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,2,64,128,1,float16,float16,0,0.07900266846021016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,2,64,0,1,float16,float16,0,0.07770133515199025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,2,64,128,1,float16,fp8,0,0.07886399825414021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,2,64,0,1,float16,fp8,0,0.07875733574231465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,2,64,128,1,fp8,fp8,0,0.0734506646792094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,2,64,0,1,fp8,fp8,0,0.07391466697057088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,4,64,128,1,float16,float16,0,0.07850666840871175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,4,64,0,1,float16,float16,0,0.07854933540026347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,4,64,128,1,float16,fp8,0,0.07796800136566162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,4,64,128,1,fp8,fp8,0,0.07434666653474171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,4,64,0,1,float16,fp8,0,0.07804800073305766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,4,64,0,1,fp8,fp8,0,0.07259733478228252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,8,64,128,1,float16,float16,0,0.0791786660750707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,8,64,0,1,float16,float16,0,0.07892266909281413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,8,64,128,1,fp8,fp8,0,0.0761599987745285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,8,64,128,1,float16,fp8,0,0.07896533111731212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,8,64,0,1,float16,fp8,0,0.07886933286984761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,8,64,0,1,fp8,fp8,0,0.07666666805744171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,24,64,128,1,float16,float16,0,0.050144001841545105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,24,64,0,1,float16,float16,0,0.05008533100287119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,24,64,128,1,fp8,fp8,0,0.047423998514811196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,24,64,128,1,float16,fp8,0,0.04990933338801066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,24,64,0,1,float16,fp8,0,0.050069332122802734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,24,64,0,1,fp8,fp8,0,0.048997332652409874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,1,64,128,1,float16,float16,0,0.04808000226815542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,1,64,128,1,float16,fp8,0,0.04762133459250132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,1,64,0,1,float16,float16,0,0.04706133405367533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,1,64,128,1,fp8,fp8,0,0.04403733213742574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,1,64,0,1,float16,fp8,0,0.04734399914741516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,1,64,0,1,fp8,fp8,0,0.04447466631730398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,2,64,128,1,float16,float16,0,0.04640533526738485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,2,64,0,1,float16,float16,0,0.04593066871166229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,2,64,128,1,fp8,fp8,0,0.04452266792456309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,2,64,128,1,float16,fp8,0,0.04783466458320618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,2,64,0,1,float16,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,2,64,0,1,fp8,fp8,0,0.04493333399295807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,4,64,128,1,float16,float16,0,0.04766400158405304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,4,64,0,1,float16,float16,0,0.04796266555786133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,4,64,128,1,float16,fp8,0,0.048138668139775596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,4,64,128,1,fp8,fp8,0,0.04571199913819631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,4,64,0,1,float16,fp8,0,0.04801600178082784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,4,64,0,1,fp8,fp8,0,0.045738667249679565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,8,64,128,1,float16,float16,0,0.04786133269468943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,8,64,0,1,float16,float16,0,0.047770669062932335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,8,64,128,1,float16,fp8,0,0.04849599798520406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,8,64,0,1,float16,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,8,64,128,1,fp8,fp8,0,0.045456002155939736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,24,64,128,1,float16,float16,0,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,8,64,0,1,fp8,fp8,0,0.04608533283074697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,24,64,0,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,24,64,128,1,float16,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,24,64,128,1,fp8,fp8,0,0.031248000760873158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,24,64,0,1,float16,fp8,0,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,24,64,0,1,fp8,fp8,0,0.030671998858451843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,1,64,128,1,float16,float16,0,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,1,64,0,1,float16,float16,0,0.02978666623433431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,1,64,128,1,float16,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,1,64,128,1,fp8,fp8,0,0.030234667162100475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,1,64,0,1,float16,fp8,0,0.031717332700888314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,1,64,0,1,fp8,fp8,0,0.02959466725587845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,2,64,128,1,float16,float16,0,0.03181866556406021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,2,64,0,1,float16,float16,0,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,2,64,128,1,float16,fp8,0,0.03183999905983607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,2,64,128,1,fp8,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,2,64,0,1,fp8,fp8,0,0.029834667841593426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,2,64,0,1,float16,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,4,64,128,1,float16,float16,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,4,64,128,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,4,64,0,1,float16,float16,0,0.03084266682465871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,4,64,128,1,fp8,fp8,0,0.031770666440327965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,4,64,0,1,float16,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,4,64,0,1,fp8,fp8,0,0.029552000264326733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,8,64,0,1,float16,float16,0,0.031343999008337654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,8,64,128,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,8,64,128,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,8,64,128,1,fp8,fp8,0,0.031727999448776245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,8,64,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,8,64,0,1,fp8,fp8,0,0.031845333675543465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,24,64,128,1,float16,float16,0,0.02595199892918269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,24,64,0,1,float16,float16,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,24,64,128,1,float16,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,24,64,128,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,24,64,0,1,fp8,fp8,0,0.02386133372783661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,24,64,0,1,float16,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,1,64,128,1,float16,float16,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,1,64,128,1,float16,fp8,0,0.02369066576162974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,1,64,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,1,64,128,1,fp8,fp8,0,0.023813332120577495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,1,64,0,1,fp8,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,2,64,128,1,float16,float16,0,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,1,64,0,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,2,64,0,1,float16,float16,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,2,64,128,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,2,64,128,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,2,64,0,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,2,64,0,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,4,64,128,1,float16,float16,0,0.023717333873112995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,4,64,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,4,64,128,1,float16,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,4,64,128,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,4,64,0,1,float16,fp8,0,0.025087999800841015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,4,64,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,8,64,128,1,float16,float16,0,0.025621332228183746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,8,64,0,1,float16,float16,0,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,8,64,128,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,8,64,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,8,64,128,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,8,64,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,24,64,128,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,24,64,128,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,24,64,0,1,float16,float16,0,0.017935999979575474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,24,64,128,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,24,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,24,64,0,1,fp8,fp8,0,0.01844800015290578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,1,64,128,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,1,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,1,64,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,1,64,128,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,1,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,1,64,0,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,2,64,128,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,2,64,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,2,64,128,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,2,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,2,64,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,2,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,4,64,128,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,4,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,4,64,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,4,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,4,64,128,1,fp8,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,4,64,0,1,fp8,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,8,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,8,64,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,8,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,8,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,8,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,8,64,0,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,24,64,128,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,24,64,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,24,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,24,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,24,64,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,24,64,0,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,1,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,1,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,1,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,1,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,1,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,2,64,128,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,1,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,2,64,0,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,2,64,128,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,2,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,2,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,2,64,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,4,64,128,1,float16,float16,0,0.0163680004576842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,4,64,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,4,64,128,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,4,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,4,64,128,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,4,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,8,64,128,1,float16,float16,0,0.01759999990463257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,8,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,8,64,128,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,8,64,128,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,8,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,8,64,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,24,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,24,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,24,64,128,1,float16,fp8,0,0.015925332903862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,24,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,24,64,0,1,fp8,fp8,0,0.015664000064134598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,24,64,0,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,1,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,1,64,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,1,64,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,1,64,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,1,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,1,64,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,2,64,128,1,float16,float16,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,2,64,0,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,2,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,2,64,128,1,fp8,fp8,0,0.015749332805474598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,2,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,2,64,0,1,float16,fp8,0,0.01568000018596649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,4,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,4,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,4,64,128,1,float16,fp8,0,0.0930560032526652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,4,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,4,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,4,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,8,64,128,1,float16,float16,0,0.018079999834299088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,8,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,8,64,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,8,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,8,64,128,1,fp8,fp8,0,0.016352000335852306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,8,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,1,64,128,1,float16,float16,0,0.17923200130462646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,1,64,128,1,float16,fp8,0,0.17895466089248657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,1,64,0,1,float16,float16,0,0.18041600783665976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,1,64,128,1,fp8,fp8,0,0.16499200463294983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,1,64,0,1,float16,fp8,0,0.17890133460362753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,1,64,0,1,fp8,fp8,0,0.1679840087890625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,2,64,128,1,float16,float16,0,0.18085867166519165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,2,64,0,1,float16,float16,0,0.17895466089248657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,2,64,128,1,float16,fp8,0,0.17965867122014365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,2,64,128,1,fp8,fp8,0,0.16942399740219116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,2,64,0,1,float16,fp8,0,0.1792693336804708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,2,64,0,1,fp8,fp8,0,0.17051732540130615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,4,64,128,1,float16,float16,0,0.18154666821161905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,4,64,0,1,float16,float16,0,0.18138132492701212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,4,64,128,1,float16,fp8,0,0.18153067429860434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,4,64,128,1,fp8,fp8,0,0.17290133237838745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,4,64,0,1,float16,fp8,0,0.18046400944391885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,4,64,0,1,fp8,fp8,0,0.17139200369517008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,8,64,128,1,float16,float16,0,0.18236267566680908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,8,64,0,1,float16,float16,0,0.183514674504598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,8,64,128,1,float16,fp8,0,0.18263467152913412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,8,64,128,1,fp8,fp8,0,0.17645333210627237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,8,64,0,1,float16,fp8,0,0.18173333009084067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,24,64,0,1,float16,float16,0,0.10402666529019673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,24,64,128,1,float16,float16,0,0.10500799616177876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,8,64,0,1,fp8,fp8,0,0.1779093345006307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,24,64,128,1,float16,fp8,0,0.10318400462468465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,24,64,128,1,fp8,fp8,0,0.10342933734258015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,24,64,0,1,float16,fp8,0,0.1053600013256073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,24,64,0,1,fp8,fp8,0,0.10377066334088643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,1,64,128,1,float16,float16,0,0.09850666920344035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,1,64,0,1,float16,float16,0,0.09919466574986775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,1,64,128,1,float16,fp8,0,0.09802666306495667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,1,64,128,1,fp8,fp8,0,0.09108799695968628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,1,64,0,1,float16,fp8,0,0.09894399841626485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,2,64,128,1,float16,float16,0,0.09784000118573506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,1,64,0,1,fp8,fp8,0,0.09128533800443013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,2,64,0,1,float16,float16,0,0.09754666686058044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,2,64,128,1,float16,fp8,0,0.09711999694506328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,2,64,128,1,fp8,fp8,0,0.08938666184743245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,2,64,0,1,float16,fp8,0,0.09694400429725647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,2,64,0,1,fp8,fp8,0,0.09112000465393066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,4,64,128,1,float16,float16,0,0.0976639986038208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,4,64,0,1,float16,float16,0,0.09749866525332133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,4,64,128,1,float16,fp8,0,0.09764267007509868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,4,64,128,1,fp8,fp8,0,0.09258133172988892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,4,64,0,1,float16,fp8,0,0.09737599889437358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,4,64,0,1,fp8,fp8,0,0.09091200431187947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,8,64,128,1,float16,float16,0,0.09954133629798889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,8,64,0,1,float16,float16,0,0.10083199540774028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,8,64,128,1,float16,fp8,0,0.09893866380055745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,8,64,128,1,fp8,fp8,0,0.0950986643632253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,8,64,0,1,float16,fp8,0,0.09922132889429729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,24,64,128,1,float16,float16,0,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,8,64,0,1,fp8,fp8,0,0.09493333101272583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,24,64,128,1,float16,fp8,0,0.058506667613983154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,24,64,0,1,float16,float16,0,0.05840533475081126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,24,64,128,1,fp8,fp8,0,0.05709333221117655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,24,64,0,1,float16,fp8,0,0.05862399935722351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,24,64,0,1,fp8,fp8,0,0.05677866439024607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,1,64,128,1,float16,float16,0,0.056362668673197426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,1,64,0,1,float16,float16,0,0.05783466498057047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,1,64,128,1,float16,fp8,0,0.05643199880917867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,1,64,128,1,fp8,fp8,0,0.05292266607284546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,1,64,0,1,float16,fp8,0,0.056128000219662987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,1,64,0,1,fp8,fp8,0,0.05409599840641022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,2,64,128,1,float16,float16,0,0.05646933118502299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,2,64,0,1,float16,float16,0,0.05683733522891998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,2,64,128,1,float16,fp8,0,0.05606933434804281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,2,64,128,1,fp8,fp8,0,0.05412800113360087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,2,64,0,1,float16,fp8,0,0.056202664971351624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,2,64,0,1,fp8,fp8,0,0.05397866666316986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,4,64,128,1,float16,float16,0,0.055914665261904396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,4,64,0,1,float16,float16,0,0.056330665946006775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,4,64,128,1,float16,fp8,0,0.05599466462930044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,4,64,128,1,fp8,fp8,0,0.05386666456858317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,4,64,0,1,float16,fp8,0,0.05630399783452352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,4,64,0,1,fp8,fp8,0,0.054287999868392944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,8,64,128,1,float16,float16,0,0.058176000912984215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,8,64,0,1,float16,float16,0,0.0561653325955073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,8,64,128,1,float16,fp8,0,0.057333335280418396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,8,64,128,1,fp8,fp8,0,0.05515199899673462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,24,64,128,1,float16,float16,0,0.03752533346414566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,8,64,0,1,fp8,fp8,0,0.05413866539796194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,8,64,0,1,float16,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,24,64,0,1,float16,float16,0,0.03619199991226196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,24,64,128,1,float16,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,24,64,128,1,fp8,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,24,64,0,1,float16,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,24,64,0,1,fp8,fp8,0,0.03575466573238373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,1,64,0,1,float16,float16,0,0.035973332822322845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,1,64,128,1,float16,float16,0,0.035418666899204254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,1,64,128,1,float16,fp8,0,0.03598399957021078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,1,64,128,1,fp8,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,1,64,0,1,float16,fp8,0,0.03545066714286804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,1,64,0,1,fp8,fp8,0,0.03390933324893316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,2,64,128,1,float16,float16,0,0.037130666275819145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,2,64,0,1,float16,float16,0,0.03547733277082443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,2,64,128,1,float16,fp8,0,0.03596800069014231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,2,64,128,1,fp8,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,2,64,0,1,float16,fp8,0,0.035818666219711304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,2,64,0,1,fp8,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,4,64,128,1,float16,float16,0,0.0360000009338061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,4,64,0,1,float16,float16,0,0.03575466573238373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,4,64,128,1,float16,fp8,0,0.03757333258787791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,4,64,128,1,fp8,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,4,64,0,1,float16,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,4,64,0,1,fp8,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,8,64,128,1,float16,float16,0,0.035749333600203194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,8,64,0,1,float16,float16,0,0.035616000493367515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,8,64,128,1,fp8,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,8,64,128,1,float16,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,8,64,0,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,8,64,0,1,fp8,fp8,0,0.03711466739575068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,24,64,128,1,float16,float16,0,0.026015999416510265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,24,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,24,64,128,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,24,64,0,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,24,64,128,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,24,64,0,1,fp8,fp8,0,0.025663999219735462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,1,64,128,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,1,64,0,1,float16,float16,0,0.025701334079106648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,1,64,128,1,float16,fp8,0,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,1,64,128,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,1,64,0,1,float16,fp8,0,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,1,64,0,1,fp8,fp8,0,0.025706666211287182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,2,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,2,64,0,1,float16,float16,0,0.025701334079106648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,2,64,128,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,2,64,128,1,fp8,fp8,0,0.02565866708755493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,2,64,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,2,64,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,4,64,128,1,float16,float16,0,0.02566933383544286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,4,64,0,1,float16,float16,0,0.02590399980545044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,4,64,128,1,float16,fp8,0,0.02693866689999898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,4,64,128,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,4,64,0,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,4,64,0,1,float16,fp8,0,0.025861332813898723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,8,64,128,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,8,64,0,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,8,64,128,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,8,64,128,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,8,64,0,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,8,64,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,24,64,128,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,24,64,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,24,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,24,64,128,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,24,64,0,1,float16,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,1,64,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,24,64,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,1,64,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,1,64,128,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,1,64,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,1,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,1,64,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,2,64,128,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,2,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,2,64,128,1,float16,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,2,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,2,64,0,1,float16,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,2,64,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,4,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,4,64,0,1,float16,float16,0,0.01958400011062622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,4,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,4,64,128,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,4,64,0,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,4,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,8,64,128,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,8,64,0,1,float16,float16,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,8,64,128,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,8,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,8,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,8,64,0,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,24,64,128,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,24,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,24,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,24,64,128,1,fp8,fp8,0,0.015781333049138386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,24,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,24,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,1,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,1,64,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,1,64,128,1,float16,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,1,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,1,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,1,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,2,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,2,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,2,64,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,2,64,128,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,2,64,0,1,float16,float16,0,0.01642666632930438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,2,64,0,1,fp8,fp8,0,0.016021333634853363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,4,64,128,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,4,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,4,64,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,4,64,128,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,4,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,8,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,4,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,8,64,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,8,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,8,64,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,8,64,128,1,fp8,fp8,0,0.01775466650724411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,8,64,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,24,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,24,64,0,1,float16,float16,0,0.01618133361140887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,24,64,128,1,float16,fp8,0,0.01621333385507266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,24,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,24,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,1,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,24,64,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,1,64,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,1,64,0,1,float16,float16,0,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,1,64,128,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,1,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,2,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,1,64,0,1,fp8,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,2,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,2,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,2,64,128,1,fp8,fp8,0,0.01617066686352094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,2,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,2,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,4,64,128,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,4,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,4,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,4,64,128,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,4,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,4,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,8,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,8,64,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,8,64,128,1,float16,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,8,64,128,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,8,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,8,64,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,24,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,24,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,24,64,128,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,24,64,128,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,24,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,24,64,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,1,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,1,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,1,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,1,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,1,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,1,64,0,1,float16,fp8,0,0.015872000406185787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,2,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,2,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,2,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,2,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,2,64,0,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,2,64,0,1,fp8,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,4,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,4,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,4,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,4,64,128,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,4,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,4,64,0,1,fp8,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,8,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,8,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,8,64,128,1,float16,fp8,0,0.017674667139848072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,8,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,8,64,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,8,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,1,64,128,1,float16,float16,0,0.14008532961209616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,1,64,0,1,float16,float16,0,0.13994666934013367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,1,64,128,1,float16,fp8,0,0.14004266262054443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,1,64,128,1,fp8,fp8,0,0.13030399878819784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,1,64,0,1,float16,fp8,0,0.1395680010318756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,1,64,0,1,fp8,fp8,0,0.12990400195121765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,2,64,128,1,float16,float16,0,0.140255997578303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,2,64,0,1,float16,float16,0,0.13858133554458618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,2,64,128,1,float16,fp8,0,0.13959999879201254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,2,64,128,1,fp8,fp8,0,0.13083733121554056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,2,64,0,1,float16,fp8,0,0.13852799932161966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,2,64,0,1,fp8,fp8,0,0.13198933005332947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,4,64,128,1,float16,float16,0,0.1388800044854482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,4,64,0,1,float16,float16,0,0.1381706694761912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,4,64,128,1,fp8,fp8,0,0.13219733039538065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,4,64,0,1,float16,fp8,0,0.13898133238156637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,4,64,128,1,float16,fp8,0,0.14099199573198953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,4,64,0,1,fp8,fp8,0,0.12997333208719888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,8,64,128,1,float16,float16,0,0.14126400152842203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,8,64,0,1,float16,float16,0,0.14032533764839172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,8,64,128,1,float16,fp8,0,0.1402720014254252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,8,64,128,1,fp8,fp8,0,0.13458133737246195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,8,64,0,1,float16,fp8,0,0.14011200269063315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,24,64,128,1,float16,float16,0,0.07884799937407176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,8,64,0,1,fp8,fp8,0,0.1338933308919271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,24,64,128,1,float16,fp8,0,0.0793333351612091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,24,64,0,1,float16,float16,0,0.07897600034872691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,24,64,128,1,fp8,fp8,0,0.07507200042406718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,24,64,0,1,float16,fp8,0,0.07843733330567677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,1,64,128,1,float16,float16,0,0.07645333309968312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,1,64,0,1,float16,float16,0,0.07749866445859273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,24,64,0,1,fp8,fp8,0,0.07659199833869934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,1,64,128,1,float16,fp8,0,0.07678399980068207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,1,64,128,1,fp8,fp8,0,0.07274133463700612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,1,64,0,1,float16,fp8,0,0.0775679995616277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,1,64,0,1,fp8,fp8,0,0.07238399982452393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,2,64,128,1,float16,float16,0,0.07660800218582153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,2,64,0,1,float16,float16,0,0.07785066465536754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,2,64,128,1,float16,fp8,0,0.07663999994595845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,2,64,128,1,fp8,fp8,0,0.07338133454322815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,2,64,0,1,float16,fp8,0,0.07696533203125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,2,64,0,1,fp8,fp8,0,0.0745600014925003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,4,64,128,1,float16,float16,0,0.07656000057856242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,4,64,128,1,float16,fp8,0,0.07754666606585185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,4,64,0,1,float16,float16,0,0.07709866762161255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,4,64,128,1,fp8,fp8,0,0.0724480003118515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,4,64,0,1,float16,fp8,0,0.07705600063006084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,4,64,0,1,fp8,fp8,0,0.07249600191911061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,8,64,128,1,float16,float16,0,0.07732800145943959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,8,64,0,1,float16,float16,0,0.07663466533025105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,8,64,128,1,float16,fp8,0,0.07684266567230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,8,64,128,1,fp8,fp8,0,0.07375999788443248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,8,64,0,1,float16,fp8,0,0.07678933441638947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,8,64,0,1,fp8,fp8,0,0.07377066711584727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,24,64,128,1,float16,float16,0,0.04827199876308441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,24,64,0,1,float16,float16,0,0.04593066871166229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,24,64,128,1,float16,fp8,0,0.04602666695912679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,24,64,128,1,fp8,fp8,0,0.0461760014295578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,24,64,0,1,float16,fp8,0,0.047824000318845115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,24,64,0,1,fp8,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,1,64,128,1,float16,float16,0,0.046394666035970054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,1,64,0,1,float16,float16,0,0.045935998360315956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,1,64,128,1,float16,fp8,0,0.04562666515509287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,1,64,0,1,float16,fp8,0,0.04614399870236715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,1,64,128,1,fp8,fp8,0,0.04446933170159658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,1,64,0,1,fp8,fp8,0,0.043765331308046974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,2,64,128,1,float16,float16,0,0.046240001916885376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,2,64,0,1,float16,float16,0,0.04595200220743815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,2,64,128,1,float16,fp8,0,0.04604266583919525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,2,64,128,1,fp8,fp8,0,0.04364266494909922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,2,64,0,1,fp8,fp8,0,0.043663998444875084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,2,64,0,1,float16,fp8,0,0.046309332052866616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,4,64,128,1,float16,float16,0,0.045925334095954895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,4,64,0,1,float16,float16,0,0.04574933151404063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,4,64,128,1,float16,fp8,0,0.047594666481018066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,4,64,128,1,fp8,fp8,0,0.04603200157483419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,4,64,0,1,float16,fp8,0,0.04605866471926371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,4,64,0,1,fp8,fp8,0,0.04588800172011057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,8,64,128,1,float16,float16,0,0.04580266773700714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,8,64,0,1,float16,float16,0,0.04644800225893656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,8,64,128,1,float16,fp8,0,0.04630400240421295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,8,64,128,1,fp8,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,8,64,0,1,float16,fp8,0,0.047007997830708824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,8,64,0,1,fp8,fp8,0,0.04590400060017904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,24,64,128,1,float16,float16,0,0.033258666594823204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,24,64,0,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,24,64,128,1,float16,fp8,0,0.03186666717131933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,24,64,128,1,fp8,fp8,0,0.031744000812371574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,24,64,0,1,float16,fp8,0,0.03211733450492223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,24,64,0,1,fp8,fp8,0,0.03162666658560435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,1,64,128,1,float16,float16,0,0.03173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,1,64,0,1,float16,float16,0,0.031317333380381264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,1,64,128,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,1,64,0,1,float16,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,1,64,128,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,1,64,0,1,fp8,fp8,0,0.031290667752424874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,2,64,0,1,float16,float16,0,0.03072533259789149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,2,64,128,1,float16,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,2,64,128,1,float16,float16,0,0.03258133431275686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,2,64,128,1,fp8,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,2,64,0,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,4,64,0,1,float16,float16,0,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,2,64,0,1,fp8,fp8,0,0.029743999242782593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,4,64,128,1,float16,float16,0,0.0329120010137558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,4,64,128,1,float16,fp8,0,0.03137599925200144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,4,64,128,1,fp8,fp8,0,0.030410667260487873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,4,64,0,1,float16,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,8,64,128,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,4,64,0,1,fp8,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,8,64,0,1,float16,float16,0,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,8,64,128,1,float16,fp8,0,0.03310933212439219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,8,64,128,1,fp8,fp8,0,0.03139200061559677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,8,64,0,1,float16,fp8,0,0.03212266663710276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,8,64,0,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,24,64,128,1,float16,float16,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,24,64,0,1,float16,float16,0,0.02181333303451538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,24,64,128,1,float16,fp8,0,0.02184533327817917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,24,64,128,1,fp8,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,24,64,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,24,64,0,1,float16,fp8,0,0.02362666775782903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,1,64,128,1,float16,float16,0,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,1,64,0,1,float16,float16,0,0.0216799999276797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,1,64,128,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,1,64,128,1,fp8,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,1,64,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,1,64,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,2,64,128,1,float16,float16,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,2,64,128,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,2,64,0,1,float16,float16,0,0.021418665846188862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,2,64,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,2,64,0,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,2,64,0,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,4,64,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,4,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,4,64,128,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,4,64,128,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,4,64,0,1,float16,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,4,64,0,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,8,64,128,1,float16,float16,0,0.022090665996074677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,8,64,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,8,64,128,1,float16,fp8,0,0.022330666581789654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,8,64,128,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,8,64,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,24,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,8,64,0,1,fp8,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,24,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,24,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,24,64,0,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,24,64,128,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,24,64,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,1,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,1,64,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,1,64,128,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,1,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,1,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,1,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,2,64,128,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,2,64,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,2,64,128,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,2,64,128,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,2,64,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,2,64,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,4,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,4,64,0,1,float16,float16,0,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,4,64,128,1,float16,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,4,64,128,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,4,64,0,1,fp8,fp8,0,0.018144000321626663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,4,64,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,8,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,8,64,0,1,float16,float16,0,0.01803733284274737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,8,64,128,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,8,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,8,64,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,24,64,128,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,8,64,0,1,fp8,fp8,0,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,24,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,24,64,128,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,24,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,24,64,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,24,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,1,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,1,64,0,1,float16,float16,0,0.016528000434239704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,1,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,1,64,128,1,fp8,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,1,64,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,1,64,0,1,fp8,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,2,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,2,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,2,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,2,64,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,2,64,128,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,2,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,4,64,128,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,4,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,4,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,4,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,4,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,4,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,8,64,128,1,float16,float16,0,0.016415999581416447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,8,64,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,8,64,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,8,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,8,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,8,64,0,1,fp8,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,24,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,24,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,24,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,24,64,128,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,24,64,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,1,64,128,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,24,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,1,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,1,64,128,1,float16,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,1,64,128,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,1,64,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,1,64,0,1,fp8,fp8,0,0.015967999895413715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,2,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,2,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,2,64,128,1,float16,fp8,0,0.016554666062196095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,2,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,2,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,4,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,2,64,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,4,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,4,64,128,1,float16,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,4,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,4,64,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,4,64,0,1,float16,fp8,0,0.016143999993801117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,8,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,8,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,8,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,8,64,128,1,float16,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,8,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,8,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,24,64,128,1,float16,float16,0,0.0162773331006368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,24,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,24,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,24,64,128,1,fp8,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,24,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,24,64,0,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,1,64,0,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,1,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,1,64,128,1,float16,float16,0,0.015813333292802174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,1,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,1,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,1,64,0,1,fp8,fp8,0,0.015989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,2,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,2,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,2,64,128,1,float16,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,2,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,2,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,4,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,2,64,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,4,64,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,4,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,4,64,128,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,4,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,4,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,8,64,128,1,float16,float16,0,0.015664000064134598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,8,64,0,1,float16,float16,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,8,64,128,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,8,64,0,1,fp8,fp8,0,0.014741333822409311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,8,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,8,64,0,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,1,64,128,1,float16,float16,0,0.12167466680208842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,1,64,0,1,float16,float16,0,0.122789333264033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,1,64,128,1,float16,fp8,0,0.1216266651948293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,1,64,128,1,fp8,fp8,0,0.11366933584213257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,1,64,0,1,float16,fp8,0,0.1226026713848114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,1,64,0,1,fp8,fp8,0,0.11376532912254333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,2,64,128,1,float16,float16,0,0.12024000287055969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,2,64,0,1,float16,float16,0,0.12243200341860454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,2,64,128,1,float16,fp8,0,0.12173333764076233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,2,64,128,1,fp8,fp8,0,0.11343466242154439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,2,64,0,1,float16,fp8,0,0.12218133608500163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,2,64,0,1,fp8,fp8,0,0.11377599835395813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,4,64,128,1,float16,float16,0,0.1213653286298116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,4,64,0,1,float16,float16,0,0.12191999951998393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,4,64,128,1,float16,fp8,0,0.12166933218638103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,4,64,128,1,fp8,fp8,0,0.11281067132949829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,4,64,0,1,float16,fp8,0,0.12175466616948445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,4,64,0,1,fp8,fp8,0,0.11353600025177002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,8,64,128,1,float16,float16,0,0.12195733189582825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,8,64,0,1,float16,float16,0,0.12185600399971008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,8,64,128,1,float16,fp8,0,0.12217066685358684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,8,64,128,1,fp8,fp8,0,0.11355200409889221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,8,64,0,1,fp8,fp8,0,0.1149173378944397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,8,64,0,1,float16,fp8,0,0.12242133418718974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,24,64,128,1,float16,float16,0,0.06852266689141591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,24,64,0,1,float16,float16,0,0.0692799985408783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,24,64,128,1,float16,fp8,0,0.0705813318490982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,24,64,128,1,fp8,fp8,0,0.06619200110435486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,24,64,0,1,float16,fp8,0,0.06865600248177846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,24,64,0,1,fp8,fp8,0,0.06640000144640605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,1,64,128,1,float16,float16,0,0.06874133149782817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,1,64,0,1,float16,float16,0,0.06656533479690552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,1,64,128,1,float16,fp8,0,0.06740800042947133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,1,64,128,1,fp8,fp8,0,0.06451199948787689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,1,64,0,1,float16,fp8,0,0.06881600121657054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,1,64,0,1,fp8,fp8,0,0.06393600006898244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,2,64,128,1,float16,float16,0,0.06875733534495036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,2,64,0,1,float16,float16,0,0.06871999800205231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,2,64,128,1,float16,fp8,0,0.06860800087451935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,2,64,128,1,fp8,fp8,0,0.0640533318122228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,2,64,0,1,float16,fp8,0,0.06877866884072621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,2,64,0,1,fp8,fp8,0,0.06474666794141133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,4,64,0,1,float16,float16,0,0.06859200199445088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,4,64,128,1,float16,float16,0,0.0691786656777064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,4,64,128,1,float16,fp8,0,0.06891199946403503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,4,64,128,1,fp8,fp8,0,0.06471466521422069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,4,64,0,1,float16,fp8,0,0.06843199829260509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,4,64,0,1,fp8,fp8,0,0.06673599779605865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,8,64,128,1,float16,float16,0,0.0682826687892278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,8,64,0,1,float16,float16,0,0.06821866830190022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,8,64,128,1,float16,fp8,0,0.06837333242098491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,8,64,128,1,fp8,fp8,0,0.06683733562628429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,24,64,128,1,float16,float16,0,0.04172799984614054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,8,64,0,1,float16,fp8,0,0.06876799960931142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,8,64,0,1,fp8,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,24,64,0,1,float16,float16,0,0.044064000248909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,24,64,128,1,float16,fp8,0,0.04193066557248434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,24,64,0,1,float16,fp8,0,0.043866669138272606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,24,64,128,1,fp8,fp8,0,0.041840001940727234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,24,64,0,1,fp8,fp8,0,0.04208533465862274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,1,64,128,1,float16,float16,0,0.041663999358812966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,1,64,0,1,float16,float16,0,0.043840001026789345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,1,64,128,1,float16,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,1,64,128,1,fp8,fp8,0,0.04041066765785217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,1,64,0,1,float16,fp8,0,0.04165333261092504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,1,64,0,1,fp8,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,2,64,128,1,float16,float16,0,0.04219200213750204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,2,64,0,1,float16,float16,0,0.04178666571776072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,2,64,128,1,float16,fp8,0,0.04359466830889384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,2,64,128,1,fp8,fp8,0,0.04028266668319702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,2,64,0,1,float16,fp8,0,0.042026668787002563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,2,64,0,1,fp8,fp8,0,0.04154666761557261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,4,64,128,1,float16,float16,0,0.044165333112080894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,4,64,0,1,float16,float16,0,0.041759997606277466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,4,64,128,1,float16,fp8,0,0.0421013335386912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,4,64,0,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,4,64,128,1,fp8,fp8,0,0.04168533285458883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,4,64,0,1,fp8,fp8,0,0.04005333284536997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,8,64,128,1,float16,float16,0,0.04156800111134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,8,64,0,1,float16,float16,0,0.04369066655635834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,8,64,128,1,float16,fp8,0,0.04221333563327789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,8,64,128,1,fp8,fp8,0,0.041296000281969704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,8,64,0,1,float16,fp8,0,0.04414399961630503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,24,64,128,1,float16,float16,0,0.027690666417280834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,8,64,0,1,fp8,fp8,0,0.041893333196640015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,24,64,0,1,float16,float16,0,0.02757333219051361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,24,64,128,1,float16,fp8,0,0.02810666710138321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,24,64,128,1,fp8,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,24,64,0,1,float16,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,24,64,0,1,fp8,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,1,64,128,1,float16,float16,0,0.02777066578467687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,1,64,0,1,float16,float16,0,0.02762666592995326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,1,64,128,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,1,64,128,1,fp8,fp8,0,0.025839999318122864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,1,64,0,1,float16,fp8,0,0.028677334388097126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,1,64,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,2,64,128,1,float16,float16,0,0.0277813325325648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,2,64,0,1,float16,float16,0,0.027722666660944622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,2,64,128,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,2,64,128,1,fp8,fp8,0,0.02734400083621343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,2,64,0,1,float16,fp8,0,0.027679999669392902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,2,64,0,1,fp8,fp8,0,0.027610667049884796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,4,64,0,1,float16,float16,0,0.02788266787926356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,4,64,128,1,float16,float16,0,0.0277813325325648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,4,64,128,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,4,64,0,1,fp8,fp8,0,0.026181332767009735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,4,64,128,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,4,64,0,1,float16,fp8,0,0.02869333326816559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,8,64,128,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,8,64,0,1,float16,float16,0,0.028704000016053517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,8,64,128,1,float16,fp8,0,0.02850666642189026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,8,64,128,1,fp8,fp8,0,0.026687999566396076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,8,64,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,8,64,0,1,fp8,fp8,0,0.0271519993742307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,24,64,128,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,24,64,0,1,float16,float16,0,0.021759999295075733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,24,64,128,1,float16,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,24,64,128,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,24,64,0,1,float16,fp8,0,0.021759999295075733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,24,64,0,1,fp8,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,1,64,128,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,1,64,0,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,1,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,1,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,1,64,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,1,64,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,2,64,128,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,2,64,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,2,64,128,1,float16,fp8,0,0.021840001145998638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,2,64,128,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,2,64,0,1,float16,fp8,0,0.020853333175182343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,4,64,128,1,float16,float16,0,0.019978666057189304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,2,64,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,4,64,0,1,float16,float16,0,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,4,64,128,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,4,64,128,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,4,64,0,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,4,64,0,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,8,64,128,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,8,64,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,8,64,128,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,8,64,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,8,64,128,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,24,64,128,1,float16,float16,0,0.01775466650724411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,8,64,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,24,64,0,1,float16,float16,0,0.01838933303952217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,24,64,128,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,24,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,24,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,24,64,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,1,64,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,1,64,128,1,float16,float16,0,0.01781333362062772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,1,64,128,1,float16,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,1,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,1,64,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,1,64,0,1,fp8,fp8,0,0.01806933308641116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,2,64,128,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,2,64,0,1,float16,float16,0,0.017605333278576534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,2,64,128,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,2,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,2,64,128,1,fp8,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,2,64,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,4,64,128,1,float16,float16,0,0.0180479995906353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,4,64,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,4,64,128,1,float16,fp8,0,0.01798933371901512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,4,64,128,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,4,64,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,4,64,0,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,8,64,128,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,8,64,0,1,float16,float16,0,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,8,64,128,1,float16,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,8,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,8,64,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,24,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,8,64,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,24,64,0,1,float16,float16,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,24,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,24,64,128,1,float16,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,24,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,24,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,1,64,128,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,1,64,0,1,float16,float16,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,1,64,128,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,1,64,128,1,fp8,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,1,64,0,1,float16,fp8,0,0.016575999557971954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,1,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,2,64,128,1,float16,float16,0,0.015802666544914246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,2,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,2,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,2,64,128,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,2,64,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,2,64,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,4,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,4,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,4,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,4,64,128,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,4,64,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,8,64,128,1,float16,float16,0,0.01599466676513354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,4,64,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,8,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,8,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,8,64,128,1,fp8,fp8,0,0.015775999675194424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,8,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,8,64,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,24,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,24,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,24,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,24,64,128,1,fp8,fp8,0,0.015722667177518208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,24,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,24,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,1,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,1,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,1,64,128,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,1,64,128,1,fp8,fp8,0,0.01578666642308235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,1,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,1,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,2,64,128,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,2,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,2,64,128,1,float16,fp8,0,0.01634666696190834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,2,64,128,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,2,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,2,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,4,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,4,64,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,4,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,4,64,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,4,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,4,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,8,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,8,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,8,64,128,1,float16,fp8,0,0.01629866659641266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,8,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,8,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,8,64,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,24,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,24,64,128,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,24,64,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,24,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,24,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,24,64,0,1,fp8,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,1,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,1,64,128,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,1,64,128,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,1,64,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,1,64,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,2,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,2,64,0,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,2,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,2,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,2,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,2,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,4,64,128,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,4,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,4,64,128,1,float16,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,4,64,128,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,4,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,4,64,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,8,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,8,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,8,64,128,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,8,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,8,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,8,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,1,64,128,1,float16,float16,0,0.105295995871226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,1,64,0,1,float16,float16,0,0.1053600013256073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,1,64,128,1,float16,fp8,0,0.10538666447003682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,1,64,128,1,fp8,fp8,0,0.09713600079218547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,1,64,0,1,fp8,fp8,0,0.09522133072217305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,1,64,0,1,float16,fp8,0,0.10668800274531047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,2,64,128,1,float16,float16,0,0.10668266812960307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,2,64,0,1,float16,float16,0,0.10549867153167725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,2,64,128,1,fp8,fp8,0,0.09734400113423665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,2,64,128,1,float16,fp8,0,0.10637866457303365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,2,64,0,1,float16,fp8,0,0.10553066929181416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,2,64,0,1,fp8,fp8,0,0.09717866778373718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,4,64,128,1,float16,float16,0,0.10617599884668986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,4,64,0,1,float16,float16,0,0.10542399684588115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,4,64,128,1,float16,fp8,0,0.10599467158317566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,4,64,0,1,float16,fp8,0,0.10559466481208801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,4,64,128,1,fp8,fp8,0,0.09799999992052714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,4,64,0,1,fp8,fp8,0,0.09732266267140706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,8,64,128,1,float16,float16,0,0.105295995871226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,8,64,128,1,float16,fp8,0,0.10758399963378906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,8,64,0,1,float16,float16,0,0.10602133472760518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,8,64,128,1,fp8,fp8,0,0.09525866309801738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,8,64,0,1,float16,fp8,0,0.10779733459154765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,8,64,0,1,fp8,fp8,0,0.09657067060470581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,24,64,128,1,float16,float16,0,0.06022400160630544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,0,0.05982399980227152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,24,64,128,1,float16,fp8,0,0.06006399790445963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,24,64,128,1,fp8,fp8,0,0.0553653339544932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,0,0.06018666426340739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,24,64,0,1,fp8,fp8,0,0.05436799923578898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,1,64,128,1,float16,float16,0,0.060234665870666504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,1,64,0,1,float16,float16,0,0.06070933242638906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,1,64,128,1,float16,fp8,0,0.059301331639289856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,1,64,128,1,fp8,fp8,0,0.0558186670144399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,1,64,0,1,float16,fp8,0,0.06066666543483734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,1,64,0,1,fp8,fp8,0,0.05385066568851471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,2,64,128,1,float16,float16,0,0.06053866446018219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,2,64,128,1,float16,fp8,0,0.05938666562239329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,2,64,0,1,float16,float16,0,0.06046933432420095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,2,64,128,1,fp8,fp8,0,0.056549335519472756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,2,64,0,1,float16,fp8,0,0.059936001896858215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,2,64,0,1,fp8,fp8,0,0.056218668818473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,4,64,128,1,float16,float16,0,0.06031466523806254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,4,64,0,1,float16,float16,0,0.059994667768478394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,4,64,128,1,float16,fp8,0,0.060496002435684204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,4,64,0,1,float16,fp8,0,0.060309335589408875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,4,64,128,1,fp8,fp8,0,0.056277334690093994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,4,64,0,1,fp8,fp8,0,0.05578133463859558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,8,64,128,1,float16,float16,0,0.06019733349482218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,8,64,0,1,float16,float16,0,0.06057600180308024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,8,64,128,1,fp8,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,8,64,128,1,float16,fp8,0,0.06105599800745646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,8,64,0,1,float16,fp8,0,0.060405333836873375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,8,64,0,1,fp8,fp8,0,0.05611733098824819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,24,64,128,1,float16,float16,0,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,0,0.037962667644023895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,24,64,128,1,float16,fp8,0,0.03800000001986822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,24,64,128,1,fp8,fp8,0,0.0359946663180987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,24,64,0,1,fp8,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,1,64,128,1,float16,float16,0,0.037946666280428566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,1,64,0,1,float16,float16,0,0.038373333712418876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,1,64,128,1,float16,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,1,64,0,1,float16,fp8,0,0.037989333271980286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,1,64,128,1,fp8,fp8,0,0.035973332822322845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,2,64,128,1,float16,float16,0,0.03738666574160258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,1,64,0,1,fp8,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,2,64,0,1,float16,float16,0,0.03812266637881597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,2,64,128,1,float16,fp8,0,0.038015998899936676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,2,64,128,1,fp8,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,2,64,0,1,float16,fp8,0,0.03790933390458425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,2,64,0,1,fp8,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,4,64,128,1,float16,float16,0,0.03786666691303253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,4,64,0,1,float16,float16,0,0.03772266705830892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,4,64,128,1,float16,fp8,0,0.037690666814645134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,4,64,128,1,fp8,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,4,64,0,1,float16,fp8,0,0.038032000263532005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,4,64,0,1,fp8,fp8,0,0.0354720006386439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,8,64,128,1,float16,float16,0,0.03796799977620443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,8,64,0,1,float16,float16,0,0.037808001041412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,8,64,128,1,float16,fp8,0,0.037776000797748566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,8,64,128,1,fp8,fp8,0,0.03549333413441976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,8,64,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,24,64,128,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,8,64,0,1,fp8,fp8,0,0.03606933355331421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,24,64,128,1,float16,fp8,0,0.02632533262173335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,24,64,128,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,24,64,0,1,fp8,fp8,0,0.025631998976071674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,1,64,128,1,float16,float16,0,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,1,64,0,1,float16,float16,0,0.025546667476495106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,1,64,128,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,1,64,128,1,fp8,fp8,0,0.025749333202838898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,1,64,0,1,fp8,fp8,0,0.025546667476495106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,1,64,0,1,float16,fp8,0,0.026176000634829204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,2,64,128,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,2,64,0,1,float16,float16,0,0.02569066733121872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,2,64,128,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,2,64,0,1,float16,fp8,0,0.02661866694688797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,2,64,128,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,2,64,0,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,4,64,128,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,4,64,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,4,64,128,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,4,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,4,64,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,4,64,0,1,fp8,fp8,0,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,8,64,128,1,float16,float16,0,0.02717333287000656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,8,64,0,1,float16,float16,0,0.02641066660483678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,8,64,128,1,float16,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,8,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,8,64,0,1,float16,fp8,0,0.02777066578467687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,8,64,0,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,24,64,128,1,float16,float16,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,24,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,24,64,128,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,24,64,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,1,64,128,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,1,64,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,1,64,128,1,float16,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,1,64,128,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,1,64,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,1,64,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,2,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,2,64,0,1,float16,float16,0,0.020474666108687718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,2,64,128,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,2,64,128,1,fp8,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,2,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,2,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,4,64,128,1,float16,float16,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,4,64,128,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,4,64,0,1,float16,float16,0,0.01966399947802226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,4,64,128,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,4,64,0,1,float16,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,4,64,0,1,fp8,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,8,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,8,64,0,1,float16,float16,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,8,64,128,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,8,64,128,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,8,64,0,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,24,64,128,1,float16,float16,0,0.01817600056529045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,8,64,0,1,fp8,fp8,0,0.020874666670958202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,24,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,24,64,128,1,fp8,fp8,0,0.018650667121013004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,0,0.01889066646496455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,24,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,1,64,128,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,1,64,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,1,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,1,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,1,64,0,1,float16,fp8,0,0.01793066660563151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,1,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,2,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,2,64,0,1,float16,float16,0,0.018858666221300762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,2,64,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,2,64,0,1,float16,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,2,64,128,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,2,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,4,64,128,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,4,64,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,4,64,128,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,4,64,128,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,4,64,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,4,64,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,8,64,128,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,8,64,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,8,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,8,64,128,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,8,64,0,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,8,64,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,24,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,24,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,24,64,128,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,24,64,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,1,64,128,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,1,64,128,1,float16,fp8,0,0.016384000579516094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,1,64,128,1,fp8,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,1,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,1,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,2,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,2,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,2,64,128,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,2,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,2,64,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,4,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,2,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,4,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,4,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,4,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,4,64,0,1,float16,fp8,0,0.01632533346613248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,4,64,0,1,fp8,fp8,0,0.018090666582187016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,8,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,8,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,8,64,128,1,float16,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,8,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,8,64,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,8,64,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,24,64,128,1,float16,float16,0,0.015781333049138386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,24,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,24,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,24,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,1,64,128,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,1,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,1,64,128,1,float16,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,1,64,0,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,2,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,1,64,0,1,fp8,fp8,0,0.016069332758585613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,2,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,2,64,128,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,2,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,2,64,128,1,fp8,fp8,0,0.01618133361140887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,2,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,4,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,4,64,0,1,float16,float16,0,0.016208000481128693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,4,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,4,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,4,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,4,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,8,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,8,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,8,64,128,1,float16,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,8,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,8,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,8,64,0,1,fp8,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,24,64,128,1,float16,float16,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,24,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,24,64,128,1,float16,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,24,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,1,64,0,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,1,64,128,1,float16,float16,0,0.016000000139077503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,1,64,128,1,float16,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,1,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,1,64,0,1,float16,fp8,0,0.01584533353646596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,1,64,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,2,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,2,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,2,64,128,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,2,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,2,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,2,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,4,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,4,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,4,64,128,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,4,64,128,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,4,64,0,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,4,64,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,8,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,8,64,128,1,float16,float16,0,0.014778666198253632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,8,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,8,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,8,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,8,64,0,1,fp8,fp8,0,0.015647999942302704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,float16,0,0.8081013361612955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,1,64,128,1,float16,fp8,0,0.8161546389261881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,1,64,128,1,fp8,fp8,0,0.7607999642690023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,float16,0,0.8218932946523031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,float16,0,4.303903897603353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,2,64,128,1,float16,fp8,0,0.8291733264923096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,1,64,0,1,float16,fp8,0,4.301130612691243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,1,64,0,1,fp8,fp8,0,3.7133334477742515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,2,64,128,1,fp8,fp8,0,0.7803146839141846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,float16,0,0.8390080134073893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,float16,0,4.320570627848308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,4,64,128,1,float16,fp8,0,0.8454773426055908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,2,64,0,1,fp8,fp8,0,3.731840133666992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,2,64,0,1,float16,fp8,0,4.319077491760254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,4,64,128,1,fp8,fp8,0,0.7993120352427164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,float16,0,0.8624160289764404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,float16,0,4.337727864583333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,8,64,128,1,float16,fp8,0,0.8730080127716064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,4,64,0,1,fp8,fp8,0,3.751381238301595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,4,64,0,1,float16,fp8,0,4.348405202229817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,8,64,128,1,fp8,fp8,0,0.8315412998199463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,float16,0,0.48442665735880536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,float16,0,4.374810536702474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,16,64,128,1,float16,fp8,0,0.49633065859476727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,16,64,128,1,fp8,fp8,0,0.4726933240890503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,float16,0,2.2863200505574546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,8,64,0,1,fp8,fp8,0,3.793717384338379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,8,64,0,1,float16,fp8,0,4.37442143758138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,float16,0,0.42896533012390137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,1,64,128,1,float16,fp8,0,0.43375468254089355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,16,64,0,1,float16,fp8,0,2.2983412742614746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,16,64,0,1,fp8,fp8,0,1.9943359692891438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,1,64,128,1,fp8,fp8,0,0.4067466656366984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,float16,0,2.214618682861328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,float16,0,0.432645320892334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,1,64,0,1,fp8,fp8,0,1.9229547182718914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,1,64,0,1,float16,fp8,0,2.2241652806599936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,2,64,128,1,float16,fp8,0,0.43846933046976727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,2,64,128,1,fp8,fp8,0,0.4140746593475342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,float16,0,2.2209226290384927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,float16,0,0.440341313680013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,2,64,0,1,fp8,fp8,0,1.9309600194295247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,2,64,0,1,float16,fp8,0,2.232565402984619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,4,64,128,1,float16,fp8,0,0.4472106695175171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,4,64,128,1,fp8,fp8,0,0.42315200964609784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,float16,0,2.2309707005818686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,float16,0,0.45420801639556885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,4,64,0,1,fp8,fp8,0,1.9435946146647136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,4,64,0,1,float16,fp8,0,2.237941265106201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,8,64,128,1,float16,fp8,0,0.463642676671346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,8,64,128,1,fp8,fp8,0,0.43756266434987384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,float16,0,2.2488320668538413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,float16,0,0.273306667804718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,16,64,128,1,float16,fp8,0,0.28101332982381183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,8,64,0,1,float16,fp8,0,2.2569120724995932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,8,64,0,1,fp8,fp8,0,1.9565973281860352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,16,64,128,1,fp8,fp8,0,0.27163734038670856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,float16,0,1.2150399684906006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,float16,0,0.24495466550191244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,1,64,128,1,float16,fp8,0,0.2470453381538391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,16,64,0,1,float16,fp8,0,1.2228319644927979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,16,64,0,1,fp8,fp8,0,1.068559964497884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,float16,0,1.175546646118164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,1,64,128,1,fp8,fp8,0,0.23662932713826498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,float16,0,0.24677334229151407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,1,64,0,1,fp8,fp8,0,1.0297013123830159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,2,64,128,1,float16,fp8,0,0.24974934260050455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,1,64,0,1,float16,fp8,0,1.1786613464355469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,2,64,128,1,fp8,fp8,0,0.24107199907302856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,float16,0,1.1816693147023518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,float16,0,0.2532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,2,64,0,1,fp8,fp8,0,1.0343200365702312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,2,64,0,1,float16,fp8,0,1.1833066940307617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,4,64,128,1,float16,fp8,0,0.2555786569913228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,4,64,128,1,fp8,fp8,0,0.24476265907287598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,float16,0,1.1859359741210938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,float16,0,0.25893332560857135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,4,64,0,1,float16,fp8,0,1.1912480195363362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,4,64,0,1,fp8,fp8,0,1.039178689320882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,8,64,128,1,float16,fp8,0,0.2633226712544759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,8,64,128,1,fp8,fp8,0,0.2523786624272664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,float16,0,1.195583979288737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,float16,0,0.19548799594243368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,8,64,0,1,fp8,fp8,0,1.0482880274454753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,16,64,128,1,float16,fp8,0,0.19550400972366333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,8,64,0,1,float16,fp8,0,1.201152006785075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,float16,0,0.7068053086598715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,16,64,128,1,fp8,fp8,0,0.18734399477640787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,float16,0,0.19128000736236572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,16,64,0,1,fp8,fp8,0,0.6219626665115356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,16,64,0,1,float16,fp8,0,0.706986665725708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,1,64,128,1,float16,fp8,0,0.1917653282483419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,float16,0,0.7009759744008383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,1,64,128,1,fp8,fp8,0,0.18492799997329712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,float16,0,0.19316265980402628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,1,64,0,1,float16,fp8,0,0.6989759604136149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,1,64,0,1,fp8,fp8,0,0.6144853432973226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,float16,0,0.6993652979532877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,2,64,128,1,fp8,fp8,0,0.18550399939219156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,2,64,128,1,float16,fp8,0,0.19350399573644003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,float16,0,0.19164266188939413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,2,64,0,1,float16,fp8,0,0.6997120380401611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,2,64,0,1,fp8,fp8,0,0.61353067557017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,float16,0,0.7019360065460205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,4,64,128,1,float16,fp8,0,0.19193599621454874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,4,64,128,1,fp8,fp8,0,0.1851360003153483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,float16,0,0.19339734315872192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,4,64,0,1,float16,fp8,0,0.7012373606363932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,4,64,0,1,fp8,fp8,0,0.6176586548487345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,8,64,128,1,float16,fp8,0,0.19261332352956137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,float16,0,0.7034506797790527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,8,64,128,1,fp8,fp8,0,0.18517333269119263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,8,64,0,1,float16,fp8,0,0.7034080028533936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,8,64,0,1,fp8,fp8,0,0.6188160181045532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,float16,0,0.6076480150222778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,1,64,128,1,fp8,fp8,0,0.5708053509394327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,1,64,128,1,float16,fp8,0,0.6137066682179769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,float16,0,0.6145546833674113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,float16,0,2.568453311920166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,1,64,0,1,float16,fp8,0,2.5740159352620444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,2,64,128,1,float16,fp8,0,0.6198720137278239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,1,64,0,1,fp8,fp8,0,2.2294559478759766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,2,64,128,1,fp8,fp8,0,0.5819360017776489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,float16,0,2.573296070098877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,float16,0,0.6241226593653361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,4,64,128,1,float16,fp8,0,0.6337440013885498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,2,64,0,1,fp8,fp8,0,2.2416693369547525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,2,64,0,1,float16,fp8,0,2.582922617594401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,4,64,128,1,fp8,fp8,0,0.5971839825312296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,float16,0,2.587168057759603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,float16,0,0.6457546552022299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,4,64,0,1,fp8,fp8,0,2.25817600886027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,4,64,0,1,float16,fp8,0,2.5989813804626465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,8,64,128,1,float16,fp8,0,0.6555840174357096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,8,64,128,1,fp8,fp8,0,0.622650663057963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,float16,0,2.611290613810221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,float16,0,0.36800531546274823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,8,64,0,1,float16,fp8,0,2.6273652712504068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,8,64,0,1,fp8,fp8,0,2.284560044606527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,16,64,128,1,float16,fp8,0,0.37797868251800537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,16,64,128,1,fp8,fp8,0,0.3604213396708171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,float16,0,1.3872159322102864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,float16,0,0.326746662457784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,16,64,0,1,float16,fp8,0,1.4152000745137532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,16,64,0,1,fp8,fp8,0,1.2212586402893066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,1,64,128,1,float16,fp8,0,0.3304533362388611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,1,64,128,1,fp8,fp8,0,0.31250133117039997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,float16,0,1.3348639806111653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,float16,0,0.33085334300994873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,1,64,0,1,fp8,fp8,0,1.1654773553212483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,1,64,0,1,float16,fp8,0,1.3401920000712078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,2,64,128,1,float16,fp8,0,0.3347253402074178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,2,64,128,1,fp8,fp8,0,0.31566933790842694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,float16,0,1.341983954111735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,float16,0,0.33478931585947674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,4,64,128,1,float16,fp8,0,0.33952534198760986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,2,64,0,1,fp8,fp8,0,1.1696213086446126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,2,64,0,1,float16,fp8,0,1.345647970835368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,4,64,128,1,fp8,fp8,0,0.3222879966100057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,float16,0,1.3452320098876953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,float16,0,0.34567999839782715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,4,64,0,1,fp8,fp8,0,1.1824533144632976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,4,64,0,1,float16,fp8,0,1.3528532981872559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,8,64,128,1,float16,fp8,0,0.3520853519439697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,8,64,128,1,fp8,fp8,0,0.3342506488164266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,float16,0,1.3585492769877117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,float16,0,0.21022399266560873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,8,64,0,1,float16,fp8,0,1.3678399721781414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,8,64,0,1,fp8,fp8,0,1.1895840167999268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,16,64,128,1,float16,fp8,0,0.21629865964253744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,float16,0,0.750490665435791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,16,64,128,1,fp8,fp8,0,0.2078346610069275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,float16,0,0.18668266137441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,16,64,0,1,fp8,fp8,0,0.6648746728897095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,16,64,0,1,float16,fp8,0,0.7585493723551432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,1,64,128,1,float16,fp8,0,0.18831467628479004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,float16,0,0.7205279668172201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,1,64,128,1,fp8,fp8,0,0.18255466222763062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,float16,0,0.18778133392333984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,1,64,0,1,float16,fp8,0,0.7215413252512614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,1,64,0,1,fp8,fp8,0,0.6365813414255778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,2,64,128,1,float16,fp8,0,0.19114667177200317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,float16,0,0.7231040000915527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,2,64,128,1,fp8,fp8,0,0.18345065911610922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,float16,0,0.19307732582092285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,2,64,0,1,float16,fp8,0,0.7241706848144531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,2,64,0,1,fp8,fp8,0,0.6363840103149414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,4,64,128,1,float16,fp8,0,0.19525333245595297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,float16,0,0.7285813490549723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,4,64,128,1,fp8,fp8,0,0.1892533302307129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,float16,0,0.19962666432062784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,4,64,0,1,fp8,fp8,0,0.6415306727091471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,4,64,0,1,float16,fp8,0,0.7301759719848633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,8,64,128,1,float16,fp8,0,0.20382932821909586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,float16,0,0.7380053202311198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,8,64,128,1,fp8,fp8,0,0.193557341893514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,float16,0,0.1527359982331594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,8,64,0,1,float16,fp8,0,0.7401333649953207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,8,64,0,1,fp8,fp8,0,0.6499466498692831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,16,64,128,1,float16,fp8,0,0.15239999691645303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,float16,0,0.45318933327992755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,16,64,128,1,fp8,fp8,0,0.14640532930692038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,float16,0,0.1483680009841919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,16,64,0,1,fp8,fp8,0,0.4002133210500081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,16,64,0,1,float16,fp8,0,0.4537706772486369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,1,64,128,1,float16,fp8,0,0.14974932869275412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,float16,0,0.4450133244196574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,1,64,128,1,fp8,fp8,0,0.1429333289464315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,float16,0,0.14847999811172485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,1,64,0,1,float16,fp8,0,0.447434663772583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,1,64,0,1,fp8,fp8,0,0.3938399950663249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,2,64,128,1,float16,fp8,0,0.14883200327555338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,float16,0,0.44713600476582843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,2,64,128,1,fp8,fp8,0,0.14242666959762573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,2,64,0,1,float16,fp8,0,0.446176012357076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,float16,0,0.1488053301970164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,2,64,0,1,fp8,fp8,0,0.39212266604105633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,float16,0,0.44536534945170086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,4,64,128,1,float16,fp8,0,0.15027733643849692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,4,64,128,1,fp8,fp8,0,0.14547200004259744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,float16,0,0.15031466881434122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,4,64,0,1,float16,fp8,0,0.44539201259613037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,4,64,0,1,fp8,fp8,0,0.39443735281626385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,float16,0,0.4493066469828288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,8,64,128,1,float16,fp8,0,0.1500640014807383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,8,64,128,1,fp8,fp8,0,0.14626666903495789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,8,64,0,1,float16,fp8,0,0.4482666651407878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,8,64,0,1,fp8,fp8,0,0.39873600006103516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,float16,0,0.5062133471171061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,1,64,128,1,float16,fp8,0,0.5118346611658732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,1,64,128,1,fp8,fp8,0,0.4787733157475789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,float16,0,1.868117332458496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,float16,0,0.5128586689631144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,1,64,0,1,float16,fp8,0,1.8712693850199382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,2,64,128,1,float16,fp8,0,0.5184959967931112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,1,64,0,1,fp8,fp8,0,1.6306079228719075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,2,64,128,1,fp8,fp8,0,0.4858826796213786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,float16,0,1.8733493487040203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,float16,0,0.52183465162913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,2,64,0,1,float16,fp8,0,1.87827730178833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,2,64,0,1,fp8,fp8,0,1.6311786969502766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,4,64,128,1,float16,fp8,0,0.5274666547775269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,4,64,128,1,fp8,fp8,0,0.4975359837214152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,float16,0,1.8845866521199544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,float16,0,0.5376586516698202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,4,64,0,1,float16,fp8,0,1.8911733627319336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,4,64,0,1,fp8,fp8,0,1.6462133725484211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,8,64,128,1,float16,fp8,0,0.5463093519210815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,8,64,128,1,fp8,fp8,0,0.5170773267745972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,float16,0,1.906511942545573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,float16,0,0.30870399872461957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,8,64,0,1,fp8,fp8,0,1.6653706232706706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,8,64,0,1,float16,fp8,0,1.9190452893575032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,16,64,128,1,float16,fp8,0,0.3158506751060486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,float16,0,1.0221920013427734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,16,64,128,1,fp8,fp8,0,0.30237332979838055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,float16,0,0.27026132742563885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,16,64,0,1,fp8,fp8,0,0.9031039873758951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,16,64,0,1,float16,fp8,0,1.030618667602539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,1,64,128,1,float16,fp8,0,0.2736000021298726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,1,64,128,1,fp8,fp8,0,0.26090667645136517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,float16,0,0.9773440361022949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,float16,0,0.27380265792210895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,1,64,0,1,float16,fp8,0,0.9799199899037679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,1,64,0,1,fp8,fp8,0,0.857151985168457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,2,64,128,1,float16,fp8,0,0.2770666678746541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,float16,0,0.9796693325042725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,2,64,128,1,fp8,fp8,0,0.26522133747736615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,float16,0,0.28036266565322876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,2,64,0,1,fp8,fp8,0,0.8620320161183676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,2,64,0,1,float16,fp8,0,0.9830559889475504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,4,64,128,1,float16,fp8,0,0.283680001894633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,float16,0,0.9886240164438883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,4,64,128,1,fp8,fp8,0,0.2702346642812093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,float16,0,0.2896266579627991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,4,64,0,1,float16,fp8,0,0.9907253583272299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,4,64,0,1,fp8,fp8,0,0.8693973223368326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,8,64,128,1,float16,fp8,0,0.2940533359845479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,float16,0,0.9985333283742269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,8,64,128,1,fp8,fp8,0,0.2796799937884013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,float16,0,0.17900800704956055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,8,64,0,1,float16,fp8,0,1.00382399559021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,16,64,128,1,float16,fp8,0,0.18294399976730347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,float16,0,0.5598613421122233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,8,64,0,1,fp8,fp8,0,0.8793813387552897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,16,64,128,1,fp8,fp8,0,0.17702933152516684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,float16,0,0.1548746625582377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,16,64,0,1,fp8,fp8,0,0.4997386535008748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,16,64,0,1,float16,fp8,0,0.5641440153121948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,1,64,128,1,float16,fp8,0,0.1568106710910797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,float16,0,0.5340799887975057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,1,64,128,1,fp8,fp8,0,0.15245333313941956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,1,64,0,1,float16,fp8,0,0.5340693394343058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,float16,0,0.15758933623631796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,1,64,0,1,fp8,fp8,0,0.4708373149236043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,float16,0,0.5328373511632284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,2,64,128,1,float16,fp8,0,0.1588266690572103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,2,64,128,1,fp8,fp8,0,0.15596800049146017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,float16,0,0.1609173317750295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,2,64,0,1,float16,fp8,0,0.537829319636027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,2,64,0,1,fp8,fp8,0,0.4731573263804118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,float16,0,0.5398399829864502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,4,64,128,1,float16,fp8,0,0.16239466269810995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,4,64,128,1,fp8,fp8,0,0.15903466939926147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,float16,0,0.16694400707880655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,4,64,0,1,float16,fp8,0,0.5410879850387573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,4,64,0,1,fp8,fp8,0,0.48050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,8,64,128,1,float16,fp8,0,0.17098132769266763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,float16,0,0.5468053420384725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,8,64,128,1,fp8,fp8,0,0.16501333316167197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,float16,0,0.1300373375415802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,8,64,0,1,float16,fp8,0,0.5494933525721232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,float16,0,0.3449973265329997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,8,64,0,1,fp8,fp8,0,0.4859626690546672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,16,64,128,1,float16,fp8,0,0.1301706631978353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,16,64,128,1,fp8,fp8,0,0.12572266658147177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,float16,0,0.12795733412106833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,16,64,0,1,float16,fp8,0,0.34401599566141766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,16,64,0,1,fp8,fp8,0,0.30612266063690186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,float16,0,0.3412586847941081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,1,64,128,1,fp8,fp8,0,0.121888001759847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,1,64,128,1,float16,fp8,0,0.12803733348846436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,1,64,0,1,float16,fp8,0,0.3391520182291667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,float16,0,0.12753599882125854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,1,64,0,1,fp8,fp8,0,0.3022666573524475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,2,64,128,1,float16,fp8,0,0.1258080005645752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,float16,0,0.34055999914805096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,2,64,128,1,fp8,fp8,0,0.12286933263142903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,float16,0,0.12758400042851767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,2,64,0,1,float16,fp8,0,0.3410773277282715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,2,64,0,1,fp8,fp8,0,0.3004960020383199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,float16,0,0.3397013346354167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,4,64,128,1,float16,fp8,0,0.12800000111262003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,4,64,128,1,fp8,fp8,0,0.12184533476829529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,4,64,0,1,float16,fp8,0,0.34061865011850995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,float16,0,0.12809066971143088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,4,64,0,1,fp8,fp8,0,0.30220266183217365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,8,64,128,1,float16,fp8,0,0.1300373375415802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,8,64,128,1,fp8,fp8,0,0.12365333239237468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,float16,0,0.3406453529993693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,8,64,0,1,fp8,fp8,0,0.30346133311589557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,8,64,0,1,float16,fp8,0,0.34282131989796955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,float16,0,0.7865920066833496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,1,64,128,1,float16,fp8,0,0.7940373420715332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,1,64,128,1,fp8,fp8,0,0.7397813002268473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,float16,0,2.48087469736735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,float16,0,0.7984800338745117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,1,64,0,1,float16,fp8,0,2.4867893854777017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,1,64,0,1,fp8,fp8,0,2.162720044453939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,2,64,128,1,float16,fp8,0,0.8068693478902181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,2,64,128,1,fp8,fp8,0,0.7567359606424967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,float16,0,2.496453285217285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,float16,0,0.8177653153737386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,2,64,0,1,fp8,fp8,0,2.181370735168457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,2,64,0,1,float16,fp8,0,2.5046186447143555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,4,64,128,1,float16,fp8,0,0.8271520137786865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,4,64,128,1,fp8,fp8,0,0.7773386637369791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,float16,0,2.518341382344564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,float16,0,0.8426720301310221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,4,64,0,1,float16,fp8,0,2.5250879923502603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,8,64,128,1,float16,fp8,0,0.8529919783274332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,4,64,0,1,fp8,fp8,0,2.2059359550476074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,8,64,128,1,fp8,fp8,0,0.8088640371958414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,float16,0,2.5514559745788574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,float16,0,0.4619733492533366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,16,64,128,1,float16,fp8,0,0.4729599952697754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,8,64,0,1,fp8,fp8,0,2.2374134063720703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,float16,0,1.3436266581217449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,8,64,0,1,float16,fp8,0,2.5596532821655273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,16,64,128,1,fp8,fp8,0,0.45069865385691327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,float16,0,0.40651734670003253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,16,64,0,1,fp8,fp8,0,1.1911413669586182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,1,64,128,1,float16,fp8,0,0.411082665125529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,16,64,0,1,float16,fp8,0,1.3512320518493652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,float16,0,1.2731359799702961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,1,64,128,1,fp8,fp8,0,0.3854293425877889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,float16,0,0.41227734088897705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,1,64,0,1,fp8,fp8,0,1.1155040264129639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,1,64,0,1,float16,fp8,0,1.2785546779632568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,2,64,128,1,float16,fp8,0,0.4167519807815552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,float16,0,1.2797760168711345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,2,64,128,1,fp8,fp8,0,0.3919413487116496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,float16,0,0.419269323348999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,2,64,0,1,float16,fp8,0,1.28494397799174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,2,64,0,1,fp8,fp8,0,1.123146692911784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,4,64,128,1,float16,fp8,0,0.42484267552693683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,float16,0,1.2902026971181233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,4,64,128,1,fp8,fp8,0,0.400762677192688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,float16,0,0.43162667751312256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,4,64,0,1,fp8,fp8,0,1.1308800379435222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,4,64,0,1,float16,fp8,0,1.295349359512329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,8,64,128,1,float16,fp8,0,0.438480019569397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,float16,0,1.306063969930013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,8,64,128,1,fp8,fp8,0,0.4150506655375163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,float16,0,0.2495573361714681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,16,64,128,1,float16,fp8,0,0.2567360003789266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,8,64,0,1,float16,fp8,0,1.313109318415324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,float16,0,0.7098346551259359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,8,64,0,1,fp8,fp8,0,1.1498400370279949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,16,64,128,1,fp8,fp8,0,0.24514132738113403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,float16,0,0.21960532665252686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,16,64,0,1,fp8,fp8,0,0.6336906750996908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,16,64,0,1,float16,fp8,0,0.7166826725006104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,1,64,128,1,float16,fp8,0,0.22156800826390585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,float16,0,0.6727519830067953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,1,64,128,1,fp8,fp8,0,0.21396799882253012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,float16,0,0.22223466634750366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,1,64,0,1,float16,fp8,0,0.6746079921722412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,1,64,0,1,fp8,fp8,0,0.5957280000050863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,float16,0,0.6742293039957682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,2,64,128,1,float16,fp8,0,0.22421866655349731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,2,64,128,1,fp8,fp8,0,0.2159573237101237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,float16,0,0.22779732942581177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,2,64,0,1,float16,fp8,0,0.6801013151804606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,2,64,0,1,fp8,fp8,0,0.5976373354593912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,4,64,128,1,float16,fp8,0,0.2303946614265442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,float16,0,0.6830879847208658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,4,64,128,1,fp8,fp8,0,0.22061334053675333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,float16,0,0.23565866549809775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,4,64,0,1,float16,fp8,0,0.6861653327941895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,4,64,0,1,fp8,fp8,0,0.6035679976145426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,float16,0,0.6920693715413412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,8,64,128,1,float16,fp8,0,0.23905066649119058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,8,64,128,1,fp8,fp8,0,0.22849067052205405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,float16,0,0.14443733294804892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,8,64,0,1,float16,fp8,0,0.6956906318664551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,float16,0,0.3952426513036092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,8,64,0,1,fp8,fp8,0,0.6135626633961996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,16,64,128,1,float16,fp8,0,0.14843199650446573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,16,64,128,1,fp8,fp8,0,0.1441386640071869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,float16,0,0.1260479986667633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,16,64,0,1,float16,fp8,0,0.3991786638895671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,16,64,0,1,fp8,fp8,0,0.3558719952901204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,1,64,128,1,float16,fp8,0,0.12626133362452188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,float16,0,0.3729493220647176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,1,64,128,1,fp8,fp8,0,0.11948800086975098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,1,64,0,1,float16,fp8,0,0.37484268347422284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,float16,0,0.12587733070055643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,1,64,0,1,fp8,fp8,0,0.3279413382212321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,2,64,128,1,float16,fp8,0,0.12783466776212057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,float16,0,0.3755360047022502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,2,64,128,1,fp8,fp8,0,0.12321600317955017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,float16,0,0.12794666488965353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,2,64,0,1,float16,fp8,0,0.3742080132166545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,2,64,0,1,fp8,fp8,0,0.3316640059153239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,4,64,128,1,float16,fp8,0,0.12988799810409546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,4,64,128,1,fp8,fp8,0,0.1280693312486013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,float16,0,0.37717334429423016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,float16,0,0.13395733634630838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,4,64,0,1,float16,fp8,0,0.3783573309580485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,4,64,0,1,fp8,fp8,0,0.33850665887196857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,8,64,128,1,float16,fp8,0,0.13724266489346823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,8,64,128,1,fp8,fp8,0,0.13436266779899597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,float16,0,0.381930669148763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,float16,0,0.10549333691596985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,8,64,0,1,float16,fp8,0,0.3863946596781413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,8,64,0,1,fp8,fp8,0,0.3451626698176066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,16,64,128,1,float16,fp8,0,0.10574932893117268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,float16,0,0.2491626739501953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,16,64,128,1,fp8,fp8,0,0.10337066650390625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,16,64,0,1,float16,fp8,0,0.24873065948486328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,16,64,0,1,fp8,fp8,0,0.22263999780019125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,float16,0,0.1036906639734904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,1,64,128,1,float16,fp8,0,0.1035040020942688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,1,64,128,1,fp8,fp8,0,0.1011946698029836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,float16,0,0.24728000164031982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,float16,0,0.103493332862854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,1,64,0,1,fp8,fp8,0,0.2200373411178589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,1,64,0,1,float16,fp8,0,0.2502400080362956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,2,64,128,1,float16,fp8,0,0.10526933272679646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,2,64,128,1,fp8,fp8,0,0.1013813316822052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,float16,0,0.24741333723068237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,float16,0,0.10522133111953735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,2,64,0,1,float16,fp8,0,0.2488159934679667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,2,64,0,1,fp8,fp8,0,0.22243734200795492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,4,64,128,1,float16,fp8,0,0.10313600301742554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,4,64,128,1,fp8,fp8,0,0.10139200091362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,float16,0,0.2485439976056417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,float16,0,0.1050986647605896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,4,64,0,1,float16,fp8,0,0.2481173276901245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,4,64,0,1,fp8,fp8,0,0.22050132354100546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,8,64,128,1,float16,fp8,0,0.10531199971834819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,8,64,128,1,fp8,fp8,0,0.1011786659558614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,float16,0,0.24837867418924967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,8,64,0,1,float16,fp8,0,0.2488159934679667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,8,64,0,1,fp8,fp8,0,0.2205866575241089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,float16,0,0.5895306666692098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,1,64,128,1,float16,fp8,0,0.5957653522491455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,1,64,128,1,fp8,fp8,0,0.5553013483683268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,float16,0,1.527722676595052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,float16,0,0.5994453430175781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,1,64,0,1,float16,fp8,0,1.5329227447509766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,1,64,0,1,fp8,fp8,0,1.3404746055603027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,2,64,128,1,float16,fp8,0,0.6049226522445679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,2,64,128,1,fp8,fp8,0,0.5659519831339518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,float16,0,1.536688009897868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,float16,0,0.609007994333903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,2,64,0,1,float16,fp8,0,1.540602684020996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,2,64,0,1,fp8,fp8,0,1.3525759379069011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,4,64,128,1,float16,fp8,0,0.6154880126317342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,4,64,128,1,fp8,fp8,0,0.5824426809946696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,float16,0,1.5506186485290527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,float16,0,0.6301386753718058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,4,64,0,1,float16,fp8,0,1.557136058807373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,4,64,0,1,fp8,fp8,0,1.3632853825887044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,8,64,128,1,float16,fp8,0,0.6378933191299438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,float16,0,1.5752746264139812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,8,64,128,1,fp8,fp8,0,0.6055839856465658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,float16,0,0.351306676864624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,8,64,0,1,float16,fp8,0,1.5853546460469563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,16,64,128,1,float16,fp8,0,0.35944000879923504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,float16,0,0.846682627995809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,8,64,0,1,fp8,fp8,0,1.3961706161499023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,16,64,128,1,fp8,fp8,0,0.34310932954152423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,float16,0,0.3091573317845662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,16,64,0,1,float16,fp8,0,0.8529067039489746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,16,64,0,1,fp8,fp8,0,0.7568586667378744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,1,64,128,1,float16,fp8,0,0.31251732508341473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,float16,0,0.7916426658630371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,1,64,128,1,fp8,fp8,0,0.2946400046348572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,float16,0,0.3134613235791524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,1,64,0,1,float16,fp8,0,0.7970026334126791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,1,64,0,1,fp8,fp8,0,0.7003839810689291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,float16,0,0.7973066965738932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,2,64,128,1,float16,fp8,0,0.31618134180704754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,2,64,128,1,fp8,fp8,0,0.2982400059700012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,float16,0,0.31990400950113934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,2,64,0,1,float16,fp8,0,0.7979093392690023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,2,64,0,1,fp8,fp8,0,0.7054453690846761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,4,64,128,1,float16,fp8,0,0.32339199384053546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,float16,0,0.802629311879476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,4,64,128,1,fp8,fp8,0,0.3063039978345235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,float16,0,0.33025066057840985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,4,64,0,1,float16,fp8,0,0.8079626560211182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,4,64,0,1,fp8,fp8,0,0.7104693253835043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,float16,0,0.8173706531524658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,8,64,128,1,float16,fp8,0,0.33575467268625897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,8,64,128,1,fp8,fp8,0,0.3163093328475952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,float16,0,0.19318399826685587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,8,64,0,1,fp8,fp8,0,0.726576010386149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,8,64,0,1,float16,fp8,0,0.8236052989959717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,float16,0,0.4537706772486369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,16,64,128,1,float16,fp8,0,0.19794134298960367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,16,64,128,1,fp8,fp8,0,0.19184533754984537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,float16,0,0.16668800512949625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,16,64,0,1,float16,fp8,0,0.4599253336588542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,16,64,0,1,fp8,fp8,0,0.4092799822489421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,1,64,128,1,float16,fp8,0,0.16876266400019327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,1,64,128,1,fp8,fp8,0,0.16285866498947144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,float16,0,0.42187734444936115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,float16,0,0.16902933518091837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,1,64,0,1,fp8,fp8,0,0.3784693479537964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,1,64,0,1,float16,fp8,0,0.42290135224660236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,2,64,128,1,fp8,fp8,0,0.1667520006497701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,2,64,128,1,float16,fp8,0,0.17121066649754843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,float16,0,0.424890677134196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,float16,0,0.1731040080388387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,2,64,0,1,fp8,fp8,0,0.3805866638819377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,2,64,0,1,float16,fp8,0,0.426581343015035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,4,64,128,1,float16,fp8,0,0.17697600523630777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,4,64,128,1,fp8,fp8,0,0.1709173321723938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,float16,0,0.4285920063654582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,float16,0,0.18174399932225546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,4,64,0,1,fp8,fp8,0,0.3866560061772664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,4,64,0,1,float16,fp8,0,0.4329119920730591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,8,64,128,1,float16,fp8,0,0.18479466438293457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,8,64,128,1,fp8,fp8,0,0.1773866613705953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,float16,0,0.43978134791056317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,float16,0,0.1125386655330658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,8,64,0,1,float16,fp8,0,0.44130667050679523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,8,64,0,1,fp8,fp8,0,0.3933813174565633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,16,64,128,1,float16,fp8,0,0.1153706709543864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,float16,0,0.25704532861709595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,16,64,128,1,fp8,fp8,0,0.11369066437085469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,float16,0,0.10122133294741313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,16,64,0,1,float16,fp8,0,0.2589919964472453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,16,64,0,1,fp8,fp8,0,0.23605332771937051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,1,64,128,1,float16,fp8,0,0.10114133358001709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,1,64,128,1,fp8,fp8,0,0.09500267108281453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,float16,0,0.2411253253618876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,1,64,0,1,float16,fp8,0,0.24310400088628134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,float16,0,0.10105599959691365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,1,64,0,1,fp8,fp8,0,0.2134986718495687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,2,64,128,1,float16,fp8,0,0.10187733173370361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,float16,0,0.2427146633466085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,2,64,128,1,fp8,fp8,0,0.09638399879137675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,float16,0,0.10135466853777568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,2,64,0,1,float16,fp8,0,0.24273600180943808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,2,64,0,1,fp8,fp8,0,0.21665066480636597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,float16,0,0.2432533303896586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,4,64,128,1,float16,fp8,0,0.10174399614334106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,4,64,128,1,fp8,fp8,0,0.09916800260543823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,4,64,0,1,float16,fp8,0,0.2456106742223104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,4,64,0,1,fp8,fp8,0,0.21598933140436807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,float16,0,0.10506666700045268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,float16,0,0.2488373319307963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,8,64,128,1,float16,fp8,0,0.10590400298436482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,8,64,128,1,fp8,fp8,0,0.10527466734250386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,8,64,0,1,float16,fp8,0,0.24885332584381104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,float16,0,0.08102933565775554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,8,64,0,1,fp8,fp8,0,0.22625599304835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,float16,0,0.16874132553736368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,16,64,128,1,float16,fp8,0,0.08086400230725606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,16,64,128,1,fp8,fp8,0,0.07886933286984761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,16,64,0,1,float16,fp8,0,0.16904000441233316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,16,64,0,1,fp8,fp8,0,0.15215999881426492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,float16,0,0.08121066788832347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,float16,0,0.16899732748667398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,1,64,128,1,float16,fp8,0,0.08130666613578796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,1,64,128,1,fp8,fp8,0,0.07865599791208903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,1,64,0,1,float16,fp8,0,0.16849066813786825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,1,64,0,1,fp8,fp8,0,0.15269333124160767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,float16,0,0.08096533517042796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,float16,0,0.1672853430112203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,2,64,128,1,float16,fp8,0,0.08257600168387096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,2,64,128,1,fp8,fp8,0,0.07892266909281413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,2,64,0,1,float16,fp8,0,0.16830933094024658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,2,64,0,1,fp8,fp8,0,0.15029333035151163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,float16,0,0.08197333415349324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,float16,0,0.1691946585973104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,4,64,128,1,float16,fp8,0,0.08082666496435802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,4,64,128,1,fp8,fp8,0,0.07852800190448761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,4,64,0,1,float16,fp8,0,0.16823466618855795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,float16,0,0.08283199866612752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,4,64,0,1,fp8,fp8,0,0.1506666640440623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,float16,0,0.16908800601959229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,8,64,128,1,float16,fp8,0,0.08073066671689351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,8,64,128,1,fp8,fp8,0,0.07888533174991608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,8,64,0,1,fp8,fp8,0,0.15204266707102457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,8,64,0,1,float16,fp8,0,0.16869866847991943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,float16,0,0.7776052951812744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,1,64,128,1,fp8,fp8,0,0.7315519650777181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,1,64,128,1,float16,fp8,0,0.7842559814453125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,float16,0,1.5616532961527507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,float16,0,0.7931733131408691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,1,64,0,1,float16,fp8,0,1.5628426869710286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,1,64,0,1,fp8,fp8,0,1.3821706771850586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,2,64,128,1,float16,fp8,0,0.7998879750569662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,2,64,128,1,fp8,fp8,0,0.747978687286377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,float16,0,1.576650619506836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,float16,0,0.809333324432373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,2,64,0,1,fp8,fp8,0,1.3978613217671711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,2,64,0,1,float16,fp8,0,1.5819093386332195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,4,64,128,1,float16,fp8,0,0.8157386779785156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,4,64,128,1,fp8,fp8,0,0.7666506767272949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,float16,0,1.5979785919189453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,float16,0,0.8380906581878662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,4,64,0,1,fp8,fp8,0,1.4173547426859539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,4,64,0,1,float16,fp8,0,1.604640007019043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,8,64,128,1,float16,fp8,0,0.8450613021850586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,8,64,128,1,fp8,fp8,0,0.7993013064066569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,float16,0,1.6329065958658855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,float16,0,0.45397865772247314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,16,64,128,1,float16,fp8,0,0.4639466603597005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,float16,0,0.871514638264974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,8,64,0,1,fp8,fp8,0,1.4556694030761719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,8,64,0,1,float16,fp8,0,1.6407039960225422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,16,64,128,1,fp8,fp8,0,0.4416053295135498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,float16,0,0.396346648534139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,16,64,0,1,fp8,fp8,0,0.7846399943033854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,16,64,0,1,float16,fp8,0,0.8805973529815674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,float16,0,0.7960266272226969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,1,64,128,1,fp8,fp8,0,0.3757493495941162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,1,64,128,1,float16,fp8,0,0.4009173313776652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,1,64,0,1,float16,fp8,0,0.8006666501363119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,float16,0,0.40097065766652423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,1,64,0,1,fp8,fp8,0,0.7079359690348307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,2,64,128,1,float16,fp8,0,0.4072800079981486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,float16,0,0.8043893178304037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,2,64,128,1,fp8,fp8,0,0.3803360064824422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,float16,0,0.4110186497370402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,2,64,0,1,float16,fp8,0,0.8063413302103678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,2,64,0,1,fp8,fp8,0,0.7157813707987467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,float16,0,0.8110933303833008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,4,64,128,1,float16,fp8,0,0.4151839812596639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,4,64,128,1,fp8,fp8,0,0.38945599397023517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,4,64,0,1,float16,fp8,0,0.818943977355957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,float16,0,0.42316798369089764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,4,64,0,1,fp8,fp8,0,0.7240800062815348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,float16,0,0.829253355662028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,8,64,128,1,float16,fp8,0,0.42879998683929443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,8,64,128,1,fp8,fp8,0,0.4057066837946574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,float16,0,0.23911466201146445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,8,64,0,1,float16,fp8,0,0.8357120354970297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,float16,0,0.456773320833842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,16,64,128,1,float16,fp8,0,0.2448800007502238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,8,64,0,1,fp8,fp8,0,0.7429546515146891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,16,64,128,1,fp8,fp8,0,0.2363146742184957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,float16,0,0.20699199040730795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,16,64,0,1,fp8,fp8,0,0.4149973392486572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,16,64,0,1,float16,fp8,0,0.46433599789937335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,float16,0,0.4166613419850667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,1,64,128,1,float16,fp8,0,0.21010132630666098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,1,64,128,1,fp8,fp8,0,0.2016213337580363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,1,64,0,1,float16,fp8,0,0.420693318049113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,1,64,0,1,fp8,fp8,0,0.3778560161590576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,float16,0,0.21127466360727945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,float16,0,0.4188106854756673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,2,64,128,1,fp8,fp8,0,0.20432533820470175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,2,64,128,1,float16,fp8,0,0.21351999044418335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,2,64,0,1,float16,fp8,0,0.4229653278986613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,float16,0,0.21622933944066366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,2,64,0,1,fp8,fp8,0,0.38012266159057617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,float16,0,0.4280426502227783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,4,64,128,1,float16,fp8,0,0.22007467349370322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,4,64,128,1,fp8,fp8,0,0.2097866733868917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,4,64,0,1,float16,fp8,0,0.43005867799123126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,float16,0,0.22471467653910318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,4,64,0,1,fp8,fp8,0,0.3858666817347209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,float16,0,0.4365546703338623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,8,64,128,1,float16,fp8,0,0.22827200094858804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,8,64,128,1,fp8,fp8,0,0.2178773283958435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,float16,0,0.13245866696039835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,8,64,0,1,float16,fp8,0,0.44126399358113605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,float16,0,0.25064533948898315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,8,64,0,1,fp8,fp8,0,0.39424534638722736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,16,64,128,1,float16,fp8,0,0.13614400227864584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,16,64,128,1,fp8,fp8,0,0.1320480008920034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,16,64,0,1,fp8,fp8,0,0.23003733158111572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,float16,0,0.11160000165303548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,16,64,0,1,float16,fp8,0,0.253546675046285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,float16,0,0.2267413338025411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,1,64,128,1,float16,fp8,0,0.11327466368675232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,1,64,128,1,fp8,fp8,0,0.10723732908566792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,1,64,0,1,float16,fp8,0,0.22962133089701334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,float16,0,0.11377066373825073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,1,64,0,1,fp8,fp8,0,0.20400534073511759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,float16,0,0.2290133237838745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,2,64,128,1,float16,fp8,0,0.11488533020019531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,2,64,128,1,fp8,fp8,0,0.11079999804496765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,2,64,0,1,float16,fp8,0,0.2304960091908773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,float16,0,0.11603732903798421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,2,64,0,1,fp8,fp8,0,0.20723734299341837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,float16,0,0.23188267151514688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,4,64,128,1,float16,fp8,0,0.11774399876594543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,4,64,128,1,fp8,fp8,0,0.11710932850837708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,4,64,0,1,float16,fp8,0,0.2339893380800883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,float16,0,0.12275200088818868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,4,64,0,1,fp8,fp8,0,0.21228265762329102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,float16,0,0.23853333791097006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,8,64,128,1,float16,fp8,0,0.12392533818880717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,8,64,128,1,fp8,fp8,0,0.12286933263142903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,8,64,0,1,float16,fp8,0,0.2403093377749125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,float16,0,0.07739733159542084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,8,64,0,1,fp8,fp8,0,0.21976532538731894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,float16,0,0.14408000310262045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,16,64,128,1,float16,fp8,0,0.08040533463160197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,16,64,128,1,fp8,fp8,0,0.08075200021266937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,16,64,0,1,float16,fp8,0,0.14527466893196106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,16,64,0,1,fp8,fp8,0,0.13607999682426453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,float16,0,0.07231999933719635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,float16,0,0.13827199737230936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,1,64,128,1,float16,fp8,0,0.07161066432793935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,1,64,128,1,fp8,fp8,0,0.0684853345155716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,1,64,0,1,float16,fp8,0,0.1385599970817566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,float16,0,0.07129066685835521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,1,64,0,1,fp8,fp8,0,0.12378666798273723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,float16,0,0.13863999644915262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,2,64,128,1,float16,fp8,0,0.07234666744867961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,2,64,128,1,fp8,fp8,0,0.0688213308652242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,2,64,0,1,float16,fp8,0,0.14008532961209616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,float16,0,0.07261333366235097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,2,64,0,1,fp8,fp8,0,0.12411733468373616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,float16,0,0.14014933506647745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,4,64,128,1,float16,fp8,0,0.07445333401362102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,4,64,128,1,fp8,fp8,0,0.0705386648575465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,4,64,0,1,float16,fp8,0,0.1402133305867513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,float16,0,0.07461333274841309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,4,64,0,1,fp8,fp8,0,0.12598933776219687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,float16,0,0.14012799660364786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,8,64,128,1,float16,fp8,0,0.07533333202203114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,8,64,128,1,fp8,fp8,0,0.0729013333717982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,float16,0,0.060346667965253196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,8,64,0,1,float16,fp8,0,0.1420693298180898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,8,64,0,1,fp8,fp8,0,0.12946666280428568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,16,64,128,1,float16,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,float16,0,0.10366400082906087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,16,64,128,1,fp8,fp8,0,0.056554665168126426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,16,64,0,1,float16,fp8,0,0.10341333349545796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,float16,0,0.05850133299827576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,16,64,0,1,fp8,fp8,0,0.09485866626103719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,float16,0,0.1035040020942688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,1,64,128,1,float16,fp8,0,0.0582826683918635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,1,64,128,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,1,64,0,1,float16,fp8,0,0.10340799887975057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,1,64,0,1,fp8,fp8,0,0.09543466567993164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,float16,0,0.06006933252016703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,2,64,128,1,float16,fp8,0,0.060415998101234436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,float16,0,0.10357333223025005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,2,64,128,1,fp8,fp8,0,0.056373332937558494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,2,64,0,1,fp8,fp8,0,0.0953493316968282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,float16,0,0.05806933343410492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,2,64,0,1,float16,fp8,0,0.10525866349538167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,4,64,128,1,float16,fp8,0,0.0582826683918635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,4,64,128,1,fp8,fp8,0,0.05630933245023092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,float16,0,0.10532266894976298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,float16,0,0.06027733286221822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,4,64,0,1,float16,fp8,0,0.10385599732398987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,4,64,0,1,fp8,fp8,0,0.09524800380071004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,float16,0,0.10545600454012553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,8,64,128,1,float16,fp8,0,0.060266668597857155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,8,64,128,1,fp8,fp8,0,0.05824000140031179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,8,64,0,1,float16,fp8,0,0.10364266236623128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,8,64,0,1,fp8,fp8,0,0.09523733456929524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,float16,0,0.5884426832199097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,1,64,128,1,float16,fp8,0,0.5909333229064941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,1,64,128,1,fp8,fp8,0,0.5501226584116617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,float16,0,1.0025973320007324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,float16,0,0.5968480110168457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,1,64,0,1,float16,fp8,0,1.0084319909413655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,1,64,0,1,fp8,fp8,0,0.8922932942708334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,2,64,128,1,float16,fp8,0,0.6016266743342081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,float16,0,1.012346665064494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,2,64,128,1,fp8,fp8,0,0.563594659169515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,2,64,0,1,float16,fp8,0,1.0173919995625813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,float16,0,0.6097973187764486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,2,64,0,1,fp8,fp8,0,0.9085813363393148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,float16,0,1.0284000237782795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,4,64,128,1,float16,fp8,0,0.614250659942627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,4,64,128,1,fp8,fp8,0,0.5762293338775635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,float16,0,0.6296799977620443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,4,64,0,1,float16,fp8,0,1.033018668492635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,4,64,0,1,fp8,fp8,0,0.9235626856486002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,float16,0,1.0506772994995117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,8,64,128,1,float16,fp8,0,0.6333706776301066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,8,64,128,1,fp8,fp8,0,0.6016213496526083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,float16,0,0.345301349957784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,8,64,0,1,float16,fp8,0,1.0572480360666912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,float16,0,0.5692746639251709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,16,64,128,1,float16,fp8,0,0.3525173266728719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,8,64,0,1,fp8,fp8,0,0.9494880040486654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,16,64,128,1,fp8,fp8,0,0.33724268277486164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,float16,0,0.30186667044957477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,16,64,0,1,float16,fp8,0,0.5765066544214884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,16,64,0,1,fp8,fp8,0,0.52074134349823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,float16,0,0.5142506758371989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,1,64,128,1,float16,fp8,0,0.30405867099761963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,1,64,128,1,fp8,fp8,0,0.28755199909210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,1,64,0,1,float16,fp8,0,0.517029325167338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,float16,0,0.3057333429654439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,1,64,0,1,fp8,fp8,0,0.46479467550913495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,2,64,128,1,float16,fp8,0,0.30830933650334674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,float16,0,0.5193653504053751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,2,64,128,1,fp8,fp8,0,0.2919893264770508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,float16,0,0.31385600566864014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,2,64,0,1,float16,fp8,0,0.5224639972050985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,2,64,0,1,fp8,fp8,0,0.47099733352661133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,4,64,128,1,float16,fp8,0,0.31648000081380206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,4,64,128,1,fp8,fp8,0,0.2997173269589742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,float16,0,0.5294453303019205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,float16,0,0.3232106765111287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,4,64,0,1,float16,fp8,0,0.531333327293396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,4,64,0,1,fp8,fp8,0,0.4798933267593384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,8,64,128,1,float16,fp8,0,0.3279520074526469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,8,64,128,1,fp8,fp8,0,0.3102666735649109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,float16,0,0.5405546824137369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,float16,0,0.18385599056879678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,8,64,0,1,fp8,fp8,0,0.4907573461532593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,8,64,0,1,float16,fp8,0,0.5442773501078287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,float16,0,0.3020533323287964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,16,64,128,1,fp8,fp8,0,0.18332266807556152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,16,64,128,1,float16,fp8,0,0.18954666455586752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,float16,0,0.1584106683731079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,16,64,0,1,float16,fp8,0,0.3078986605008443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,16,64,0,1,fp8,fp8,0,0.2803093393643697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,1,64,128,1,float16,fp8,0,0.15948266784350076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,1,64,128,1,fp8,fp8,0,0.15465066830317178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,float16,0,0.2718186577161153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,float16,0,0.16074132919311523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,1,64,0,1,fp8,fp8,0,0.2490239938100179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,1,64,0,1,float16,fp8,0,0.2728586594263713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,2,64,128,1,float16,fp8,0,0.162581334511439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,2,64,128,1,fp8,fp8,0,0.15667200088500977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,float16,0,0.27347199122111004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,2,64,0,1,float16,fp8,0,0.27532800038655597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,2,64,0,1,fp8,fp8,0,0.2514773408571879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,float16,0,0.16486933827400208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,4,64,128,1,float16,fp8,0,0.16690667470296225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,4,64,128,1,fp8,fp8,0,0.16268799702326456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,fp8,0,0.28124799331029254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,4,64,0,1,float16,float16,0,0.2811093330383301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,float16,0,0.17309866348902384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,4,64,0,1,fp8,fp8,0,0.2568053404490153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,float16,0,0.2876960039138794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,8,64,128,1,float16,fp8,0,0.17670933405558267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,8,64,128,1,fp8,fp8,0,0.1688106656074524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,8,64,0,1,float16,fp8,0,0.28990934292475384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,float16,0,0.1032373309135437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,8,64,0,1,fp8,fp8,0,0.2653759916623433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,float16,0,0.16760534048080444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,16,64,128,1,float16,fp8,0,0.10538132985432942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,16,64,128,1,fp8,fp8,0,0.10412800312042236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,16,64,0,1,fp8,fp8,0,0.15811199943224588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,16,64,0,1,float16,fp8,0,0.17132800817489624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,float16,0,0.08915199836095174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,1,64,128,1,float16,fp8,0,0.09105066458384196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,1,64,128,1,fp8,fp8,0,0.08504000306129456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,float16,0,0.15260266264279684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,1,64,0,1,float16,fp8,0,0.15449066956837973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,float16,0,0.09019733468691508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,1,64,0,1,fp8,fp8,0,0.13829867045084634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,2,64,128,1,float16,fp8,0,0.09087999661763509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,float16,0,0.15254933635393778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,2,64,128,1,fp8,fp8,0,0.08507200082143147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,2,64,0,1,fp8,fp8,0,0.13803199927012125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,2,64,0,1,float16,fp8,0,0.15451199809710184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,float16,0,0.0912000040213267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,4,64,128,1,float16,fp8,0,0.09310400485992432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,4,64,128,1,fp8,fp8,0,0.08872532844543457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,float16,0,0.15461333592732748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,4,64,0,1,float16,fp8,0,0.15688533584276834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,float16,0,0.09306666254997253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,4,64,0,1,fp8,fp8,0,0.14225600163141885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,float16,0,0.15944533546765646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,8,64,128,1,float16,fp8,0,0.09541333715120952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,8,64,128,1,fp8,fp8,0,0.09507733583450317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,8,64,0,1,float16,fp8,0,0.15996266404787698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,float16,0,0.06154666841030121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,8,64,0,1,fp8,fp8,0,0.1499573290348053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,float16,0,0.09921600421269734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,16,64,128,1,float16,fp8,0,0.06291733185450236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,16,64,128,1,fp8,fp8,0,0.06100266675154368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,16,64,0,1,float16,fp8,0,0.10105599959691365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,float16,0,0.057258665561676025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,16,64,0,1,fp8,fp8,0,0.0950879951318105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,float16,0,0.09593600034713745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,1,64,128,1,float16,fp8,0,0.05772800246874491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,1,64,128,1,fp8,fp8,0,0.05443733433882395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,1,64,0,1,float16,fp8,0,0.09703999757766724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,1,64,0,1,fp8,fp8,0,0.08686400453249614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,float16,0,0.057616000374158226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,float16,0,0.09690133730570476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,2,64,128,1,float16,fp8,0,0.058058664202690125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,2,64,128,1,fp8,fp8,0,0.05492266515890757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,2,64,0,1,float16,fp8,0,0.09729599952697754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,2,64,0,1,fp8,fp8,0,0.08785600463549297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,float16,0,0.05789333085219065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,float16,0,0.097653329372406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,4,64,128,1,float16,fp8,0,0.05801066756248474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,4,64,128,1,fp8,fp8,0,0.05639466643333435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,4,64,0,1,float16,fp8,0,0.09804800152778625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,4,64,0,1,fp8,fp8,0,0.08825600147247314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,float16,0,0.05784533421198527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,float16,0,0.09816533327102661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,8,64,128,1,float16,fp8,0,0.06025599936644236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,8,64,128,1,fp8,fp8,0,0.05813866853713989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,8,64,0,1,fp8,fp8,0,0.0890773336092631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,float16,0,0.050000001986821495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,8,64,0,1,float16,fp8,0,0.09946133693059285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,16,64,128,1,float16,fp8,0,0.05022400120894114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,float16,0,0.07487466434637706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,16,64,128,1,fp8,fp8,0,0.04809066653251648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,16,64,0,1,fp8,fp8,0,0.06866666674613953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,float16,0,0.04997866849104563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,16,64,0,1,float16,fp8,0,0.07425066828727722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,1,64,128,1,float16,fp8,0,0.05027733246485392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,1,64,128,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,float16,0,0.07470400134722392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,float16,0,0.05061866839726766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,1,64,0,1,fp8,fp8,0,0.06853333115577698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,1,64,0,1,float16,fp8,0,0.07478933533032735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,float16,0,0.07441066702206929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,2,64,128,1,float16,fp8,0,0.05009066561857859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,2,64,128,1,fp8,fp8,0,0.04966933528582255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,2,64,0,1,fp8,fp8,0,0.06815466781457265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,float16,0,0.05057600140571594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,2,64,0,1,float16,fp8,0,0.07481066882610321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,4,64,128,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,float16,0,0.07460799813270569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,4,64,128,1,fp8,fp8,0,0.04794666667779287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,4,64,0,1,fp8,fp8,0,0.06874666611353557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,float16,0,0.050016000866889954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,4,64,0,1,float16,fp8,0,0.07479999959468842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,float16,0,0.07464533547560374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,8,64,128,1,fp8,fp8,0,0.04799466828505198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,8,64,128,1,float16,fp8,0,0.05065066615740458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,8,64,0,1,float16,fp8,0,0.075013334552447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,8,64,0,1,fp8,fp8,0,0.06862933437029521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,float16,0,0.78603196144104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,1,64,128,1,float16,fp8,0,0.7822986443837484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,float16,0,1.1010026931762695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,1,64,128,1,fp8,fp8,0,0.7230026721954346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,1,64,0,1,float16,fp8,0,1.1012213230133057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,1,64,0,1,fp8,fp8,0,0.9832426706949869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,float16,0,0.7992213567097982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,2,64,128,1,fp8,fp8,0,0.7360693613688151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,2,64,128,1,float16,fp8,0,0.8020319938659668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,float16,0,1.1198453108469646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,2,64,0,1,float16,fp8,0,1.1196160316467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,2,64,0,1,fp8,fp8,0,0.9975093205769857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,float16,0,0.819152037302653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,float16,0,1.140229304631551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,4,64,128,1,float16,fp8,0,0.8170773188273112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,4,64,128,1,fp8,fp8,0,0.752837340037028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,4,64,0,1,float16,fp8,0,1.1414720217386882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,4,64,0,1,fp8,fp8,0,1.0158666769663494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,float16,0,0.8376479943593343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,float16,0,1.1660799980163574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,8,64,128,1,fp8,fp8,0,0.7846559683481852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,8,64,128,1,float16,fp8,0,0.8409226735432943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,8,64,0,1,float16,fp8,0,1.1688160101572673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,float16,0,0.4498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,float16,0,0.628661314646403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,8,64,0,1,fp8,fp8,0,1.0531840324401855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,16,64,128,1,float16,fp8,0,0.46107733249664307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,16,64,128,1,fp8,fp8,0,0.43940265973409015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,float16,0,0.3933440049489339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,16,64,0,1,fp8,fp8,0,0.5827840169270834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,16,64,0,1,float16,fp8,0,0.6359306573867798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,float16,0,0.5560053189595541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,1,64,128,1,float16,fp8,0,0.39607465267181396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,1,64,128,1,fp8,fp8,0,0.37081066767374676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,1,64,0,1,float16,fp8,0,0.5614453156789144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,1,64,0,1,fp8,fp8,0,0.5047680139541626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,float16,0,0.4016266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,float16,0,0.5622239907582601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,2,64,128,1,float16,fp8,0,0.4044693311055501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,2,64,128,1,fp8,fp8,0,0.37803733348846436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,2,64,0,1,float16,fp8,0,0.5680746634801229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,2,64,0,1,fp8,fp8,0,0.5127679904301962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,float16,0,0.40826133886973065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,float16,0,0.5731786489486694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,4,64,128,1,float16,fp8,0,0.4118560155232747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,4,64,128,1,fp8,fp8,0,0.3875253200531006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,4,64,0,1,float16,fp8,0,0.5785386562347412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,4,64,0,1,fp8,fp8,0,0.5212106704711914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,float16,0,0.42000532150268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,float16,0,0.5873386859893799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,8,64,128,1,float16,fp8,0,0.4253600041071574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,8,64,128,1,fp8,fp8,0,0.40040000279744464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,8,64,0,1,float16,fp8,0,0.5952959855397543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,float16,0,0.2366559902826945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,8,64,0,1,fp8,fp8,0,0.5391146739323934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,float16,0,0.32652799288431805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,16,64,128,1,float16,fp8,0,0.24126933018366495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,16,64,128,1,fp8,fp8,0,0.2321066657702128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,16,64,0,1,float16,fp8,0,0.3327146569887797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,float16,0,0.2031573255856832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,16,64,0,1,fp8,fp8,0,0.3064639965693156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,float16,0,0.28756799300511676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,1,64,128,1,float16,fp8,0,0.20590933163960776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,1,64,128,1,fp8,fp8,0,0.19616534312566122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,1,64,0,1,float16,fp8,0,0.2916693290074666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,float16,0,0.20570667584737143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,1,64,0,1,fp8,fp8,0,0.26768000920613605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,float16,0,0.29178667068481445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,2,64,128,1,fp8,fp8,0,0.19954667488733926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,2,64,128,1,float16,fp8,0,0.2081600030263265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,2,64,0,1,float16,fp8,0,0.29340267181396484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,float16,0,0.2130133310953776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,2,64,0,1,fp8,fp8,0,0.27035733064015705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,float16,0,0.2982613245646159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,4,64,128,1,float16,fp8,0,0.2145599921544393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,4,64,128,1,fp8,fp8,0,0.20579200983047485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,4,64,0,1,float16,fp8,0,0.3015093406041463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,float16,0,0.2207733392715454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,4,64,0,1,fp8,fp8,0,0.2768266598383586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,8,64,128,1,float16,fp8,0,0.22346667448679605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,8,64,128,1,fp8,fp8,0,0.21416000525156656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,float16,0,0.30721600850423175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,float16,0,0.12638933459917703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,8,64,0,1,fp8,fp8,0,0.28574933608373004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,8,64,0,1,float16,fp8,0,0.31177600224812824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,float16,0,0.17851734161376953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,16,64,128,1,float16,fp8,0,0.1304639975229899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,16,64,128,1,fp8,fp8,0,0.12618666887283325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,16,64,0,1,float16,fp8,0,0.18131732940673828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,float16,0,0.1055626670519511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,16,64,0,1,fp8,fp8,0,0.16933866341908774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,float16,0,0.15477333466211954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,1,64,128,1,float16,fp8,0,0.10788266857465108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,1,64,128,1,fp8,fp8,0,0.10256000359853108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,1,64,0,1,float16,fp8,0,0.1558026671409607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,float16,0,0.10744000474611919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,1,64,0,1,fp8,fp8,0,0.14275733629862467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,float16,0,0.15452800194422403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,2,64,128,1,float16,fp8,0,0.10945066809654236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,2,64,128,1,fp8,fp8,0,0.10557867089907329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,2,64,0,1,float16,fp8,0,0.1564906636873881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,2,64,0,1,fp8,fp8,0,0.14573867122332254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,float16,0,0.11036800344785054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,float16,0,0.15812266866366068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,4,64,128,1,float16,fp8,0,0.11145066221555074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,4,64,128,1,fp8,fp8,0,0.11054399609565735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,4,64,0,1,float16,fp8,0,0.1607200006643931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,4,64,0,1,fp8,fp8,0,0.15058666467666626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,float16,0,0.11642133196194966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,float16,0,0.1646399994691213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,8,64,128,1,float16,fp8,0,0.11918933192888896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,8,64,128,1,fp8,fp8,0,0.11752532919247945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,8,64,0,1,float16,fp8,0,0.16724799076716104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,float16,0,0.07213866710662842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,8,64,0,1,fp8,fp8,0,0.15683199961980185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,float16,0,0.09925333658854167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,16,64,128,1,float16,fp8,0,0.07460799813270569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,16,64,128,1,fp8,fp8,0,0.07452266911665599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,16,64,0,1,float16,fp8,0,0.10136000315348308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,16,64,0,1,fp8,fp8,0,0.09551999966303508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,float16,0,0.06648533542950948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,float16,0,0.09107733766237895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,1,64,128,1,float16,fp8,0,0.06636266907056172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,1,64,128,1,fp8,fp8,0,0.06257600088914235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,1,64,0,1,float16,fp8,0,0.09331199526786804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,1,64,0,1,fp8,fp8,0,0.08338133494059245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,float16,0,0.06665599842866261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,float16,0,0.09300266702969869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,2,64,128,1,float16,fp8,0,0.06657599906126659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,2,64,128,1,fp8,fp8,0,0.06359999875227611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,2,64,0,1,fp8,fp8,0,0.08521067102750142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,2,64,0,1,float16,fp8,0,0.0932426651318868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,float16,0,0.06689066688219707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,float16,0,0.09304533402125041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,4,64,128,1,float16,fp8,0,0.06875200072924297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,4,64,128,1,fp8,fp8,0,0.06432533264160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,4,64,0,1,float16,fp8,0,0.09446932872136433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,4,64,0,1,fp8,fp8,0,0.084906667470932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,float16,0,0.06689066688219707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,float16,0,0.09506666660308838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,8,64,128,1,float16,fp8,0,0.06858133276303609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,8,64,128,1,fp8,fp8,0,0.06646933158238728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,8,64,0,1,float16,fp8,0,0.09702400366465251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,8,64,0,1,fp8,fp8,0,0.08921066919962566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,float16,0,0.04567466676235199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,float16,0,0.06437333424886067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,16,64,128,1,float16,fp8,0,0.04780800143877665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,16,64,128,1,fp8,fp8,0,0.04558933277924856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,16,64,0,1,fp8,fp8,0,0.06042666733264923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,float16,0,0.0439626673857371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,16,64,0,1,float16,fp8,0,0.06648000081380208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,float16,0,0.06243200103441874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,1,64,128,1,float16,fp8,0,0.04399999976158142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,1,64,128,1,fp8,fp8,0,0.041893333196640015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,1,64,0,1,float16,fp8,0,0.06364800035953522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,1,64,0,1,fp8,fp8,0,0.056517332792282104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,float16,0,0.043807998299598694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,float16,0,0.06216000020503998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,2,64,128,1,float16,fp8,0,0.04418133199214935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,2,64,128,1,fp8,fp8,0,0.04162133236726125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,2,64,0,1,float16,fp8,0,0.0626453310251236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,2,64,0,1,fp8,fp8,0,0.05842666824658712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,float16,0,0.04225599765777588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,float16,0,0.06242666641871134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,4,64,128,1,float16,fp8,0,0.04390933116277059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,4,64,0,1,float16,fp8,0,0.06435733536879222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,4,64,128,1,fp8,fp8,0,0.04192533095677694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,4,64,0,1,fp8,fp8,0,0.05834666887919108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,float16,0,0.04387199878692627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,float16,0,0.06445333361625671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,8,64,128,1,float16,fp8,0,0.045968001087506614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,8,64,128,1,fp8,fp8,0,0.0436160018046697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,8,64,0,1,float16,fp8,0,0.06426133215427399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,8,64,0,1,fp8,fp8,0,0.06048533320426941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,float16,0,0.03568000098069509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,float16,0,0.04975999891757965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,16,64,128,1,float16,fp8,0,0.036602665980656944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,16,64,128,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,16,64,0,1,float16,fp8,0,0.048309331138928734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,16,64,0,1,fp8,fp8,0,0.04607999821503957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,float16,0,0.03610666592915853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,float16,0,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,1,64,128,1,float16,fp8,0,0.03580799947182337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,1,64,128,1,fp8,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,1,64,0,1,float16,fp8,0,0.0497920016447703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,float16,0,0.03577066709597906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,1,64,0,1,fp8,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,float16,0,0.04966400067011515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,2,64,128,1,float16,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,2,64,128,1,fp8,fp8,0,0.03533866753180822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,2,64,0,1,float16,fp8,0,0.04814933240413666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,2,64,0,1,fp8,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,float16,0,0.03606399893760681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,float16,0,0.047728002071380615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,4,64,128,1,float16,fp8,0,0.03597866743803024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,4,64,128,1,fp8,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,4,64,0,1,float16,fp8,0,0.047770669062932335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,4,64,0,1,fp8,fp8,0,0.045797333121299744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,float16,0,0.03602666656176249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,float16,0,0.048154667019844055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,8,64,128,1,float16,fp8,0,0.03640000025431315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,8,64,128,1,fp8,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,8,64,0,1,float16,fp8,0,0.050069332122802734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,8,64,0,1,fp8,fp8,0,0.04607999821503957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,float16,0,0.6052053372065226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,1,64,128,1,float16,fp8,0,0.605679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,float16,0,0.7585546970367432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,1,64,128,1,fp8,fp8,0,0.5635786851247152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,1,64,0,1,float16,fp8,0,0.7605493068695068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,1,64,0,1,fp8,fp8,0,0.6890239715576172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,float16,0,0.6260426839192709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,2,64,128,1,float16,fp8,0,0.6265973250071207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,float16,0,0.7811893622080485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,2,64,128,1,fp8,fp8,0,0.5745706558227539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,float16,0,0.6387573480606079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,2,64,0,1,fp8,fp8,0,0.697920004526774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,2,64,0,1,float16,fp8,0,0.7827413082122803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,float16,0,0.7977973620096842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,4,64,128,1,float16,fp8,0,0.637392004330953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,4,64,128,1,fp8,fp8,0,0.5866186618804932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,4,64,0,1,float16,fp8,0,0.7987733681996664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,4,64,0,1,fp8,fp8,0,0.7144107023874918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,float16,0,0.6618826786677042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,float16,0,0.8235893249511719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,8,64,128,1,float16,fp8,0,0.6527680158615112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,8,64,128,1,fp8,fp8,0,0.6073386669158936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,float16,0,0.3543200095494588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,8,64,0,1,float16,fp8,0,0.8144533634185791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,8,64,0,1,fp8,fp8,0,0.7369173367818197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,float16,0,0.44236799081166583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,16,64,128,1,float16,fp8,0,0.3571733236312866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,16,64,128,1,fp8,fp8,0,0.33952534198760986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,16,64,0,1,float16,fp8,0,0.4442506631215413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,float16,0,0.3044106761614482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,16,64,0,1,fp8,fp8,0,0.41233599185943604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,float16,0,0.3826560179392497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,1,64,128,1,float16,fp8,0,0.30561065673828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,1,64,128,1,fp8,fp8,0,0.288154661655426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,1,64,0,1,float16,fp8,0,0.3859306573867798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,1,64,0,1,fp8,fp8,0,0.35285333792368573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,float16,0,0.3081279993057251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,float16,0,0.3895786603291829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,2,64,128,1,float16,fp8,0,0.31115732590357464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,2,64,128,1,fp8,fp8,0,0.29388266801834106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,2,64,0,1,float16,fp8,0,0.3908853530883789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,2,64,0,1,fp8,fp8,0,0.35950934886932373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,float16,0,0.317029337088267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,float16,0,0.3993493318557739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,4,64,128,1,float16,fp8,0,0.3200266758600871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,4,64,128,1,fp8,fp8,0,0.3021706740061442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,4,64,0,1,fp8,fp8,0,0.36765865484873456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,4,64,0,1,float16,fp8,0,0.40201600392659503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,float16,0,0.3266826669375102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,8,64,128,1,float16,fp8,0,0.3304479916890462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,float16,0,0.4123680194218953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,8,64,128,1,fp8,fp8,0,0.31169599294662476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,8,64,0,1,float16,fp8,0,0.41367467244466144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,float16,0,0.18410134315490723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,8,64,0,1,fp8,fp8,0,0.3771680196126302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,float16,0,0.23210134108861288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,16,64,128,1,float16,fp8,0,0.18690667549769083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,16,64,128,1,fp8,fp8,0,0.18010133504867554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,16,64,0,1,float16,fp8,0,0.23421865701675415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,16,64,0,1,fp8,fp8,0,0.21768534183502197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,float16,0,0.15250666936238608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,float16,0,0.19578667481740317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,1,64,128,1,float16,fp8,0,0.15575466553370157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,1,64,128,1,fp8,fp8,0,0.1521813372770945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,1,64,0,1,float16,fp8,0,0.19825599590937296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,1,64,0,1,fp8,fp8,0,0.18757865826288858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,float16,0,0.15647466977437338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,float16,0,0.19873066743214926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,2,64,128,1,fp8,fp8,0,0.15441067020098367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,2,64,128,1,float16,fp8,0,0.15874666968981424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,2,64,0,1,float16,fp8,0,0.20164799690246582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,2,64,0,1,fp8,fp8,0,0.18888533115386963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,float16,0,0.16314133008321127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,float16,0,0.2058080037434896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,4,64,128,1,float16,fp8,0,0.16423466801643372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,4,64,128,1,fp8,fp8,0,0.1609653333822886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,4,64,0,1,float16,fp8,0,0.20753600200017294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,4,64,0,1,fp8,fp8,0,0.19548799594243368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,float16,0,0.17081600427627563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,8,64,128,1,float16,fp8,0,0.17292267084121704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,8,64,128,1,fp8,fp8,0,0.1655893325805664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,float16,0,0.21597866217295328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,float16,0,0.09898666540781657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,8,64,0,1,float16,fp8,0,0.21802133321762085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,8,64,0,1,fp8,fp8,0,0.20198400815327963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,16,64,128,1,float16,fp8,0,0.1016533374786377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,float16,0,0.12435733278592427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,16,64,128,1,fp8,fp8,0,0.10120532910029094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,16,64,0,1,float16,fp8,0,0.1260426640510559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,16,64,0,1,fp8,fp8,0,0.12191999951998393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,float16,0,0.08498133222262065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,1,64,128,1,float16,fp8,0,0.08646399776140849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,float16,0,0.10813867052396138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,1,64,128,1,fp8,fp8,0,0.08088000118732452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,1,64,0,1,fp8,fp8,0,0.09947199622790019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,1,64,0,1,float16,fp8,0,0.10951466361681621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,float16,0,0.08476799726486206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,2,64,128,1,float16,fp8,0,0.08648533622423808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,float16,0,0.10841066638628642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,2,64,128,1,fp8,fp8,0,0.08052266637484233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,2,64,0,1,float16,fp8,0,0.11003733674685161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,2,64,0,1,fp8,fp8,0,0.1014453371365865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,float16,0,0.08666132887204488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,4,64,128,1,float16,fp8,0,0.08923733234405518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,float16,0,0.10971732934315999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,4,64,128,1,fp8,fp8,0,0.08502399921417236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,4,64,0,1,float16,fp8,0,0.11364266276359558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,4,64,0,1,fp8,fp8,0,0.10417600472768147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,float16,0,0.09063466389973958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,8,64,128,1,float16,fp8,0,0.09304533402125041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,float16,0,0.11506666739781697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,8,64,128,1,fp8,fp8,0,0.09292266766230266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,8,64,0,1,fp8,fp8,0,0.11148800452550252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,8,64,0,1,float16,fp8,0,0.11716266473134358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,float16,0,0.05587733288606008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,16,64,128,1,float16,fp8,0,0.05798399945100149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,16,64,128,1,fp8,fp8,0,0.05686399837334951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,float16,0,0.07346666852633159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,16,64,0,1,float16,fp8,0,0.07443200051784515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,16,64,0,1,fp8,fp8,0,0.0702453354994456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,float16,0,0.05172266562779745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,float16,0,0.06842666864395142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,1,64,128,1,float16,fp8,0,0.05222400029500326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,1,64,128,1,fp8,fp8,0,0.05034666756788889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,1,64,0,1,float16,fp8,0,0.06851733227570851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,1,64,0,1,fp8,fp8,0,0.0635040005048116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,float16,0,0.05220800141493479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,float16,0,0.06817600131034851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,2,64,128,1,float16,fp8,0,0.052704001466433205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,2,64,128,1,fp8,fp8,0,0.05180266499519348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,2,64,0,1,float16,fp8,0,0.07026666899522145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,2,64,0,1,fp8,fp8,0,0.06257600088914235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,float16,0,0.051872000098228455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,float16,0,0.06871999800205231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,4,64,128,1,float16,fp8,0,0.054042667150497437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,4,64,128,1,fp8,fp8,0,0.05207466582457224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,4,64,0,1,float16,fp8,0,0.07053333520889282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,4,64,0,1,fp8,fp8,0,0.06406933565934499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,float16,0,0.0540533314148585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,float16,0,0.06994133194287618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,8,64,128,1,float16,fp8,0,0.0562720000743866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,8,64,128,1,fp8,fp8,0,0.05459733307361603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,8,64,0,1,float16,fp8,0,0.07231999933719635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,8,64,0,1,fp8,fp8,0,0.06639466683069865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,float16,0,0.05026133358478546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,16,64,128,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,16,64,128,1,fp8,fp8,0,0.039461334546407066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,16,64,0,1,float16,fp8,0,0.05013866722583771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,16,64,0,1,fp8,fp8,0,0.04779199759165446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,float16,0,0.047781333327293396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,1,64,128,1,float16,fp8,0,0.03934400031963984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,1,64,128,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,1,64,0,1,float16,fp8,0,0.04818133513132731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,1,64,0,1,fp8,fp8,0,0.043935999274253845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,float16,0,0.03790933390458425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,2,64,128,1,float16,fp8,0,0.03772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,float16,0,0.04804266492525736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,2,64,128,1,fp8,fp8,0,0.03748266647259394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,2,64,0,1,float16,fp8,0,0.047968000173568726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,2,64,0,1,fp8,fp8,0,0.04373333354791006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,float16,0,0.03989866624275843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,float16,0,0.047925333182017006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,4,64,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,4,64,128,1,fp8,fp8,0,0.0379573330283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,4,64,0,1,float16,fp8,0,0.04806933303674062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,4,64,0,1,fp8,fp8,0,0.04404800136884054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,float16,0,0.048623998959859215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,float16,0,0.03929600119590759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,8,64,128,1,float16,fp8,0,0.0395413339138031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,8,64,128,1,fp8,fp8,0,0.038047999143600464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,8,64,0,1,float16,fp8,0,0.04842666784922282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,8,64,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,float16,0,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,16,64,128,1,float16,fp8,0,0.03148266673088074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,16,64,128,1,fp8,fp8,0,0.03014400104681651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,16,64,0,1,float16,fp8,0,0.039877332746982574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,16,64,0,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,float16,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,float16,0,0.03997866561015447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,1,64,128,1,float16,fp8,0,0.031925333042939506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,1,64,128,1,fp8,fp8,0,0.03137599925200144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,1,64,0,1,float16,fp8,0,0.040991999208927155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,float16,0,0.03156266609827677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,1,64,0,1,fp8,fp8,0,0.037418665985266365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,float16,0,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,2,64,128,1,float16,fp8,0,0.0317493329445521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,2,64,128,1,fp8,fp8,0,0.029765332738558452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,2,64,0,1,float16,fp8,0,0.039962666730086006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,2,64,0,1,fp8,fp8,0,0.037802666425704956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,float16,0,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,float16,0,0.03985599925120672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,4,64,128,1,float16,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,4,64,128,1,fp8,fp8,0,0.029711998999118805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,4,64,0,1,float16,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,4,64,0,1,fp8,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,float16,0,0.031888000667095184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,float16,0,0.040218666195869446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,8,64,128,1,float16,fp8,0,0.031504000226656594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,8,64,128,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,8,64,0,1,float16,fp8,0,0.040805332362651825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,8,64,0,1,fp8,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,float16,0,0.702298641204834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,1,64,128,1,float16,fp8,0,0.6954240004221598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,float16,0,0.7964266935984293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,1,64,128,1,fp8,fp8,0,0.6610453526178995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,1,64,0,1,float16,fp8,0,0.7915999889373779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,1,64,0,1,fp8,fp8,0,0.7292959690093994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,float16,0,0.7091999848683676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,float16,0,0.8030292987823486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,2,64,128,1,float16,fp8,0,0.7029066880544027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,2,64,0,1,float16,fp8,0,0.8019999663035074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,2,64,128,1,fp8,fp8,0,0.6598399877548218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,2,64,0,1,fp8,fp8,0,0.7310293515523275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,float16,0,0.7209440072377523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,4,64,128,1,float16,fp8,0,0.7185333569844564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,4,64,128,1,fp8,fp8,0,0.767568031946818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,float16,0,0.8228853543599447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,4,64,0,1,float16,fp8,0,0.8184853394826254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,float16,0,0.7027040322621664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,4,64,0,1,fp8,fp8,0,0.8446293671925863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,8,64,128,1,float16,fp8,0,0.696394681930542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,float16,0,0.8089439868927002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,float16,0,0.37371734778086346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,8,64,128,1,fp8,fp8,0,0.7538666725158691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,8,64,0,1,float16,fp8,0,0.7978933652242025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,8,64,0,1,fp8,fp8,0,0.8342986901601156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,float16,0,0.4302826722462972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,16,64,128,1,float16,fp8,0,0.36604801813761395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,16,64,128,1,fp8,fp8,0,0.3779733180999756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,16,64,0,1,float16,fp8,0,0.4252053499221802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,16,64,0,1,fp8,fp8,0,0.423855980237325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,float16,0,0.3582133452097575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,float16,0,0.40931200981140137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,1,64,128,1,float16,fp8,0,0.3569173415501912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,1,64,128,1,fp8,fp8,0,0.3351999918619792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,1,64,0,1,float16,fp8,0,0.40587735176086426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,1,64,0,1,fp8,fp8,0,0.3731520175933838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,float16,0,0.3619573513666789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,float16,0,0.4124853213628133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,2,64,128,1,float16,fp8,0,0.35976000626881915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,2,64,128,1,fp8,fp8,0,0.3385173479715983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,2,64,0,1,float16,fp8,0,0.4108853340148926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,2,64,0,1,fp8,fp8,0,0.3766719897588094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,float16,0,0.3696959813435872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,float16,0,0.42069868246714276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,4,64,128,1,float16,fp8,0,0.36921600500742596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,4,64,128,1,fp8,fp8,0,0.3734293381373088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,4,64,0,1,float16,fp8,0,0.4193013509114583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,4,64,0,1,fp8,fp8,0,0.413263996442159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,float16,0,0.36128000418345135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,8,64,128,1,float16,fp8,0,0.3577386538187663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,float16,0,0.4145333369572957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,8,64,128,1,fp8,fp8,0,0.3692266543706258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,8,64,0,1,float16,fp8,0,0.4111040035883586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,8,64,0,1,fp8,fp8,0,0.4078720013300578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,float16,0,0.19776533047358194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,float16,0,0.2268213431040446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,16,64,128,1,float16,fp8,0,0.19437867403030396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,16,64,128,1,fp8,fp8,0,0.19938133160273233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,16,64,0,1,float16,fp8,0,0.22409067551294962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,16,64,0,1,fp8,fp8,0,0.2222986618677775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,float16,0,0.1888213356335958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,float16,0,0.21430933475494385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,1,64,128,1,float16,fp8,0,0.1874613364537557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,1,64,128,1,fp8,fp8,0,0.17731199661890665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,1,64,0,1,float16,fp8,0,0.21248000860214233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,1,64,0,1,fp8,fp8,0,0.1958400011062622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,float16,0,0.1912320057551066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,float16,0,0.21660266319910684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,2,64,128,1,fp8,fp8,0,0.17933332920074463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,2,64,128,1,float16,fp8,0,0.1904266675313314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,2,64,0,1,float16,fp8,0,0.21638933817545572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,2,64,0,1,fp8,fp8,0,0.19832533597946167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,float16,0,0.19402666886647543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,float16,0,0.22025599082310995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,4,64,128,1,float16,fp8,0,0.19362133741378784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,4,64,128,1,fp8,fp8,0,0.19040000438690186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,4,64,0,1,float16,fp8,0,0.22001065810521445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,float16,0,0.18952532609303793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,4,64,0,1,fp8,fp8,0,0.21171732743581137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,float16,0,0.21849066019058228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,8,64,128,1,float16,fp8,0,0.18782933553059897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,8,64,128,1,fp8,fp8,0,0.18868800004323324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,float16,0,0.11101866761843364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,8,64,0,1,float16,fp8,0,0.21733866135279337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,8,64,0,1,fp8,fp8,0,0.20936532815297446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,float16,0,0.1276533305644989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,16,64,128,1,float16,fp8,0,0.10801066954930623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,16,64,128,1,fp8,fp8,0,0.109333336353302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,16,64,0,1,float16,fp8,0,0.12525866429011026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,16,64,0,1,fp8,fp8,0,0.12286399801572163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,float16,0,0.10187199711799622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,1,64,128,1,float16,fp8,0,0.09985599915186565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,float16,0,0.11591999729474385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,1,64,128,1,fp8,fp8,0,0.09540800253550212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,1,64,0,1,float16,fp8,0,0.11562666296958923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,1,64,0,1,fp8,fp8,0,0.10805333654085796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,float16,0,0.10342400272687276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,float16,0,0.11753599842389424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,2,64,128,1,float16,fp8,0,0.10219732920328777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,2,64,128,1,fp8,fp8,0,0.09762666622797649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,2,64,0,1,float16,fp8,0,0.11710932850837708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,2,64,0,1,fp8,fp8,0,0.10979732871055603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,float16,0,0.11978666981061299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,float16,0,0.1053600013256073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,4,64,128,1,float16,fp8,0,0.10528533657391866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,4,64,128,1,fp8,fp8,0,0.10402133067448933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,4,64,0,1,float16,fp8,0,0.12007466952006023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,4,64,0,1,fp8,fp8,0,0.1158026655515035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,float16,0,0.10521066188812256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,float16,0,0.11983999609947205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,8,64,128,1,float16,fp8,0,0.10389332969983418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,8,64,0,1,float16,fp8,0,0.11965333422025044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,8,64,128,1,fp8,fp8,0,0.10531199971834819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,8,64,0,1,fp8,fp8,0,0.11513599753379822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,float16,0,0.060346667965253196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,float16,0,0.0691840002934138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,16,64,128,1,float16,fp8,0,0.06037333110968272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,16,64,128,1,fp8,fp8,0,0.06410133341948192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,16,64,0,1,float16,fp8,0,0.06888533135255177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,16,64,0,1,fp8,fp8,0,0.07011733452479045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,float16,0,0.05824000140031179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,float16,0,0.06646400193373363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,1,64,128,1,float16,fp8,0,0.05858666698137919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,1,64,128,1,fp8,fp8,0,0.05560533205668131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,1,64,0,1,float16,fp8,0,0.06849066913127899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,1,64,0,1,fp8,fp8,0,0.06275199850400288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,float16,0,0.05843733251094818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,float16,0,0.06874666611353557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,2,64,128,1,fp8,fp8,0,0.0561706672112147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,2,64,128,1,float16,fp8,0,0.05874133110046387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,2,64,0,1,float16,fp8,0,0.06868266562620799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,2,64,0,1,fp8,fp8,0,0.06404800216356914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,float16,0,0.06051200131575266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,float16,0,0.0699946681658427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,4,64,128,1,float16,fp8,0,0.060602664947509766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,4,64,128,1,fp8,fp8,0,0.05801066756248474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,4,64,0,1,float16,fp8,0,0.06878399848937988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,float16,0,0.05914666752020518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,4,64,0,1,fp8,fp8,0,0.06609066824118297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,float16,0,0.06691200037797292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,8,64,128,1,float16,fp8,0,0.0580213318268458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,8,64,128,1,fp8,fp8,0,0.059952000776926674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,8,64,0,1,fp8,fp8,0,0.06574933230876923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,8,64,0,1,float16,fp8,0,0.06620266536871593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,float16,0,0.03786666691303253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,16,64,128,1,float16,fp8,0,0.0378560001651446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,16,64,128,1,fp8,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,float16,0,0.04568000137805939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,16,64,0,1,float16,fp8,0,0.04578666885693868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,16,64,0,1,fp8,fp8,0,0.0440533310174942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,float16,0,0.0378560001651446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,float16,0,0.04552533229192098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,1,64,128,1,float16,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,1,64,128,1,fp8,fp8,0,0.03547733277082443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,1,64,0,1,float16,fp8,0,0.044154668847719826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,1,64,0,1,fp8,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,float16,0,0.03779733429352442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,2,64,128,1,float16,fp8,0,0.03791466603676478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,2,64,128,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,float16,0,0.04424533247947693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,2,64,0,1,float16,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,2,64,0,1,fp8,fp8,0,0.04330666859944662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,float16,0,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,float16,0,0.04615999758243561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,4,64,128,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,4,64,128,1,fp8,fp8,0,0.038202665746212006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,4,64,0,1,float16,fp8,0,0.04623466730117798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,4,64,0,1,fp8,fp8,0,0.04294399917125702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,float16,0,0.03796799977620443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,float16,0,0.04598399996757507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,8,64,128,1,float16,fp8,0,0.04052799940109253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,8,64,128,1,fp8,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,8,64,0,1,float16,fp8,0,0.04445866743723551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,8,64,0,1,fp8,fp8,0,0.04398400088151296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,float16,0,0.025648000339667004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,16,64,128,1,float16,fp8,0,0.026389333109060924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,16,64,128,1,fp8,fp8,0,0.027610667049884796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,16,64,0,1,float16,fp8,0,0.031311998764673867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,16,64,0,1,fp8,fp8,0,0.02917333443959554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,float16,0,0.02979733298222224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,1,64,128,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,1,64,128,1,fp8,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,1,64,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,1,64,0,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,float16,0,0.025631998976071674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,float16,0,0.029391999046007793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,2,64,128,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,2,64,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,2,64,128,1,fp8,fp8,0,0.02498133232196172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,2,64,0,1,fp8,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,float16,0,0.030970667799313862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,4,64,128,1,float16,fp8,0,0.02589333305756251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,4,64,128,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,4,64,0,1,float16,fp8,0,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,4,64,0,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,float16,0,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,8,64,128,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,float16,0,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,8,64,128,1,fp8,fp8,0,0.026373334228992462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,8,64,0,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,8,64,0,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,float16,0,0.025701334079106648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,16,64,128,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,16,64,128,1,fp8,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,16,64,0,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,16,64,0,1,fp8,fp8,0,0.02585600068171819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,1,64,128,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,1,64,128,1,fp8,fp8,0,0.020768000433842342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,1,64,0,1,float16,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,1,64,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,float16,0,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,2,64,128,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,2,64,128,1,float16,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,2,64,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,2,64,0,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,4,64,128,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,4,64,128,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,4,64,0,1,float16,fp8,0,0.02700799951950709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,4,64,0,1,fp8,fp8,0,0.02479466547568639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,float16,0,0.02619733413060506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,8,64,128,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,8,64,128,1,fp8,fp8,0,0.02149333308140437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,8,64,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,8,64,0,1,fp8,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,1,64,128,1,float16,float16,0,0.6856426397959391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,1,64,0,1,float16,float16,0,0.6819360256195068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,1,64,128,1,float16,fp8,0,0.6762186686197916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,1,64,0,1,fp8,fp8,0,0.6271040042241415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,1,64,0,1,float16,fp8,0,0.6797333558400472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,1,64,128,1,fp8,fp8,0,0.6336906750996908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,2,64,128,1,float16,float16,0,0.6881333192189535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,2,64,0,1,float16,float16,0,0.6897706985473633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,2,64,128,1,float16,fp8,0,0.6856479644775391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,2,64,0,1,float16,fp8,0,0.6833759943644205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,2,64,128,1,fp8,fp8,0,0.6421120166778564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,2,64,0,1,fp8,fp8,0,0.6353280146916708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,4,64,128,1,float16,float16,0,0.7046026388804117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,4,64,0,1,float16,float16,0,0.7057387034098307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,4,64,128,1,float16,fp8,0,0.704474687576294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,4,64,128,1,fp8,fp8,0,0.7510879834493002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,4,64,0,1,float16,fp8,0,0.7018667062123617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,4,64,0,1,fp8,fp8,0,0.7468960285186768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,8,64,128,1,float16,float16,0,0.6851360003153483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,8,64,0,1,float16,float16,0,0.6903466383616129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,8,64,128,1,float16,fp8,0,0.6770133177439371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,8,64,128,1,fp8,fp8,0,0.7351360321044922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,8,64,0,1,float16,fp8,0,0.6812319755554199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,8,64,0,1,fp8,fp8,0,0.7357386747996012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,16,64,128,1,float16,float16,0,0.36374398072560626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,16,64,0,1,float16,float16,0,0.36653868357340497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,16,64,128,1,float16,fp8,0,0.35579200585683185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,16,64,128,1,fp8,fp8,0,0.36817065874735516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,16,64,0,1,float16,fp8,0,0.3598293463389079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,1,64,128,1,float16,float16,0,0.34938132762908936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,16,64,0,1,fp8,fp8,0,0.36879467964172363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,1,64,0,1,float16,float16,0,0.3492106596628825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,1,64,128,1,float16,fp8,0,0.34718934694925946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,1,64,128,1,fp8,fp8,0,0.32549866040547687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,1,64,0,1,float16,fp8,0,0.34699734052022296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,1,64,0,1,fp8,fp8,0,0.3221386671066284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,2,64,128,1,float16,float16,0,0.3548159996668498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,2,64,0,1,float16,float16,0,0.35416531562805176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,2,64,128,1,float16,fp8,0,0.35129066308339435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,2,64,128,1,fp8,fp8,0,0.33057065804799396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,2,64,0,1,float16,fp8,0,0.3532319863637288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,2,64,0,1,fp8,fp8,0,0.32665600379308063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,4,64,0,1,float16,float16,0,0.36213334401448566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,4,64,128,1,float16,fp8,0,0.3599840005238851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,4,64,128,1,float16,float16,0,0.3596373399098714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,4,64,128,1,fp8,fp8,0,0.36582934856414795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,4,64,0,1,float16,fp8,0,0.35994664827982586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,4,64,0,1,fp8,fp8,0,0.3617066542307536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,8,64,128,1,float16,float16,0,0.350655992825826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,8,64,0,1,float16,float16,0,0.35344000657399494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,8,64,128,1,float16,fp8,0,0.3472213347752889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,8,64,128,1,fp8,fp8,0,0.3622080087661743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,8,64,0,1,float16,fp8,0,0.3503093322118123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,16,64,128,1,float16,float16,0,0.1912213365236918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,8,64,0,1,fp8,fp8,0,0.35441601276397705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,16,64,0,1,float16,float16,0,0.19343467553456625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,16,64,128,1,float16,fp8,0,0.18986133734385172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,16,64,128,1,fp8,fp8,0,0.193231999874115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,16,64,0,1,float16,fp8,0,0.19107200702031454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,16,64,0,1,fp8,fp8,0,0.19409600893656412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,1,64,128,1,float16,float16,0,0.18318933248519897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,1,64,0,1,float16,float16,0,0.18297600746154785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,1,64,128,1,float16,fp8,0,0.1818293333053589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,1,64,128,1,fp8,fp8,0,0.17147733767827353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,1,64,0,1,float16,fp8,0,0.1818293333053589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,1,64,0,1,fp8,fp8,0,0.17094933986663818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,2,64,128,1,float16,float16,0,0.1859040061632792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,2,64,0,1,float16,float16,0,0.18577067057291666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,2,64,128,1,float16,fp8,0,0.18315200010935465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,2,64,128,1,fp8,fp8,0,0.1751733422279358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,2,64,0,1,float16,fp8,0,0.18526933590571085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,2,64,0,1,fp8,fp8,0,0.17306133111317953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,4,64,128,1,float16,float16,0,0.18920000394185385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,4,64,0,1,float16,float16,0,0.18955733378728232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,4,64,128,1,float16,fp8,0,0.18863999843597412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,4,64,128,1,fp8,fp8,0,0.1851466695467631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,4,64,0,1,float16,fp8,0,0.18926932414372763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,4,64,0,1,fp8,fp8,0,0.18437333901723227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,8,64,128,1,float16,float16,0,0.18531733751296997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,8,64,0,1,float16,float16,0,0.18702934185663858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,8,64,128,1,float16,fp8,0,0.1835626761118571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,8,64,128,1,fp8,fp8,0,0.1842026710510254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,8,64,0,1,float16,fp8,0,0.18363734086354574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,16,64,128,1,float16,float16,0,0.1055466632048289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,8,64,0,1,fp8,fp8,0,0.18403732776641846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,16,64,0,1,float16,float16,0,0.1090880036354065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,16,64,128,1,float16,fp8,0,0.10457066694895427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,16,64,128,1,fp8,fp8,0,0.10764799515406291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,16,64,0,1,float16,fp8,0,0.10724799831708272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,16,64,0,1,fp8,fp8,0,0.10853333274523418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,1,64,128,1,float16,float16,0,0.0993386705716451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,1,64,0,1,float16,float16,0,0.09934932986895244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,1,64,128,1,float16,fp8,0,0.09942400455474854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,1,64,0,1,float16,fp8,0,0.09916266798973083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,1,64,128,1,fp8,fp8,0,0.09443733096122742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,1,64,0,1,fp8,fp8,0,0.09290666381518047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,2,64,128,1,float16,float16,0,0.10085333387056987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,2,64,0,1,float16,float16,0,0.10027199983596802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,2,64,128,1,float16,fp8,0,0.0999840001265208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,2,64,128,1,fp8,fp8,0,0.09719467163085938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,2,64,0,1,float16,fp8,0,0.09962667028109233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,2,64,0,1,fp8,fp8,0,0.09454933802286784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,4,64,0,1,float16,float16,0,0.10365333159764607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,4,64,128,1,float16,float16,0,0.10331733028093974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,4,64,128,1,float16,fp8,0,0.10300800204277039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,4,64,128,1,fp8,fp8,0,0.10153067111968994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,4,64,0,1,fp8,fp8,0,0.10126399993896484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,4,64,0,1,float16,fp8,0,0.10387733578681946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,8,64,128,1,float16,float16,0,0.10140800476074219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,8,64,0,1,float16,float16,0,0.10319999853769939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,8,64,128,1,float16,fp8,0,0.10057066877683003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,8,64,128,1,fp8,fp8,0,0.10281067093213399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,8,64,0,1,float16,fp8,0,0.10218666990598042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,8,64,0,1,fp8,fp8,0,0.10249599814414978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,16,64,128,1,float16,float16,0,0.05909866591294607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,16,64,0,1,float16,float16,0,0.05835199852784475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,16,64,128,1,float16,fp8,0,0.058415999015172325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,16,64,0,1,float16,fp8,0,0.05762133498986562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,16,64,128,1,fp8,fp8,0,0.06211733321348826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,16,64,0,1,fp8,fp8,0,0.060991997520128884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,1,64,128,1,float16,float16,0,0.05651199817657471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,1,64,0,1,float16,float16,0,0.05781333148479462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,1,64,128,1,float16,fp8,0,0.05784533421198527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,1,64,128,1,fp8,fp8,0,0.05407999952634176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,1,64,0,1,float16,fp8,0,0.057162667314211525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,1,64,0,1,fp8,fp8,0,0.05401599903901418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,2,64,128,1,float16,float16,0,0.05825066566467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,2,64,0,1,float16,float16,0,0.05629866818586985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,2,64,128,1,float16,fp8,0,0.056159997979799904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,2,64,128,1,fp8,fp8,0,0.05609600245952606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,2,64,0,1,float16,fp8,0,0.05648000041643778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,2,64,0,1,fp8,fp8,0,0.05425066749254862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,4,64,128,1,float16,float16,0,0.058133333921432495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,4,64,0,1,float16,float16,0,0.05855466425418854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,4,64,128,1,float16,fp8,0,0.05809600154558817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,4,64,128,1,fp8,fp8,0,0.056794668237368263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,4,64,0,1,float16,fp8,0,0.058677335580190025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,8,64,128,1,float16,float16,0,0.05806933343410492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,4,64,0,1,fp8,fp8,0,0.05639466643333435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,8,64,0,1,float16,float16,0,0.0563679983218511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,8,64,128,1,float16,fp8,0,0.05845866600672404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,8,64,128,1,fp8,fp8,0,0.05845866600672404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,8,64,0,1,float16,fp8,0,0.0565280020236969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,8,64,0,1,fp8,fp8,0,0.0576800008614858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,16,64,128,1,float16,float16,0,0.03756800045569738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,16,64,0,1,float16,float16,0,0.037952000896135964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,16,64,128,1,float16,fp8,0,0.03818133225043615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,16,64,128,1,fp8,fp8,0,0.037978666524092354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,16,64,0,1,float16,fp8,0,0.038218667109807335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,16,64,0,1,fp8,fp8,0,0.03788266579310099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,1,64,128,1,float16,float16,0,0.03793599953254064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,1,64,0,1,float16,float16,0,0.0384853333234787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,1,64,128,1,float16,fp8,0,0.03751999884843826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,1,64,128,1,fp8,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,1,64,0,1,float16,fp8,0,0.03850133220354716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,1,64,0,1,fp8,fp8,0,0.035749333600203194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,2,64,128,1,float16,float16,0,0.037978666524092354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,2,64,0,1,float16,float16,0,0.037632000943024956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,2,64,128,1,float16,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,2,64,128,1,fp8,fp8,0,0.03793066740036011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,2,64,0,1,float16,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,2,64,0,1,fp8,fp8,0,0.037818667789300285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,4,64,128,1,float16,float16,0,0.037776000797748566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,4,64,128,1,float16,fp8,0,0.03941333293914795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,4,64,0,1,float16,float16,0,0.039813332259655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,4,64,128,1,fp8,fp8,0,0.03801066676775614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,4,64,0,1,float16,fp8,0,0.039919999738534294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,4,64,0,1,fp8,fp8,0,0.03799466788768768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,8,64,0,1,float16,float16,0,0.039450667798519135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,8,64,128,1,float16,fp8,0,0.03827733298142751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,8,64,128,1,float16,float16,0,0.03770666569471359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,8,64,0,1,float16,fp8,0,0.03975466638803482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,8,64,128,1,fp8,fp8,0,0.038005332152048744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,8,64,0,1,fp8,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,16,64,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,16,64,128,1,float16,float16,0,0.025642665723959606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,16,64,128,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,16,64,128,1,fp8,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,16,64,0,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,16,64,0,1,fp8,fp8,0,0.025727999707063038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,1,64,128,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,1,64,0,1,float16,float16,0,0.02571733295917511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,1,64,128,1,float16,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,1,64,128,1,fp8,fp8,0,0.02478933334350586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,1,64,0,1,float16,fp8,0,0.025631998976071674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,1,64,0,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,2,64,128,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,2,64,0,1,float16,float16,0,0.025573333104451496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,2,64,128,1,float16,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,2,64,128,1,fp8,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,2,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,2,64,0,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,4,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,4,64,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,4,64,128,1,float16,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,4,64,128,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,4,64,0,1,float16,fp8,0,0.02588266630967458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,4,64,0,1,fp8,fp8,0,0.02554133286078771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,8,64,128,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,8,64,0,1,float16,float16,0,0.026543999711672466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,8,64,128,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,8,64,128,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,8,64,0,1,float16,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,16,64,128,1,float16,float16,0,0.022015998760859173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,8,64,0,1,fp8,fp8,0,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,16,64,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,16,64,128,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,16,64,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,1,64,128,1,float16,float16,0,0.020981334149837494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,16,64,0,1,fp8,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,16,64,0,1,float16,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,1,64,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,1,64,128,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,1,64,128,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,1,64,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,1,64,0,1,float16,fp8,0,0.02182399978240331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,2,64,128,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,2,64,0,1,float16,float16,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,2,64,128,1,float16,fp8,0,0.021717332303524017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,2,64,128,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,2,64,0,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,4,64,128,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,2,64,0,1,fp8,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,4,64,0,1,float16,float16,0,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,4,64,128,1,float16,fp8,0,0.02183466653029124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,4,64,128,1,fp8,fp8,0,0.021925332645575207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,4,64,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,4,64,0,1,fp8,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,8,64,128,1,float16,float16,0,0.021744000415007275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,8,64,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,8,64,128,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,8,64,0,1,float16,fp8,0,0.02176533391078313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,8,64,128,1,fp8,fp8,0,0.021920000513394673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,8,64,0,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,16,64,128,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,16,64,128,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,16,64,0,1,float16,float16,0,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,16,64,0,1,float16,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,16,64,128,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,16,64,0,1,fp8,fp8,0,0.01966399947802226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,1,64,128,1,float16,float16,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,1,64,0,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,1,64,128,1,float16,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,1,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,1,64,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,1,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,2,64,128,1,float16,float16,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,2,64,0,1,float16,float16,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,2,64,128,1,float16,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,2,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,2,64,0,1,float16,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,2,64,0,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,4,64,128,1,float16,float16,0,0.020928000410397846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,4,64,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,4,64,128,1,float16,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,4,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,4,64,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,4,64,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,8,64,128,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,8,64,0,1,float16,float16,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,8,64,128,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,8,64,128,1,fp8,fp8,0,0.020128000527620316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,8,64,0,1,float16,fp8,0,0.020351999749739964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,8,64,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,1,64,128,1,float16,float16,0,0.3214613397916158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,1,64,0,1,float16,float16,0,0.31542932987213135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,1,64,128,1,float16,fp8,0,0.31922133763631183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,1,64,128,1,fp8,fp8,0,0.3004213372866313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,1,64,0,1,float16,fp8,0,0.31170666217803955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,1,64,0,1,fp8,fp8,0,0.2928053339322408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,2,64,128,1,float16,float16,0,0.32894400755564374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,2,64,0,1,float16,float16,0,0.32156266768773395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,2,64,128,1,float16,fp8,0,0.3264159957567851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,2,64,128,1,fp8,fp8,0,0.3060373266537984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,2,64,0,1,float16,fp8,0,0.31861333052317303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,2,64,0,1,fp8,fp8,0,0.2977813283602397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,4,64,128,1,float16,float16,0,0.33852799733479816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,4,64,0,1,float16,float16,0,0.33290133873621625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,4,64,128,1,float16,fp8,0,0.3357333342234294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,4,64,128,1,fp8,fp8,0,0.34135464827219647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,4,64,0,1,float16,fp8,0,0.32953067620595294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,8,64,128,1,float16,float16,0,0.3295146624247233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,4,64,0,1,fp8,fp8,0,0.3349066575368245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,8,64,0,1,float16,float16,0,0.3225066661834717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,8,64,128,1,float16,fp8,0,0.3266719977060954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,8,64,128,1,fp8,fp8,0,0.3375946680704753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,16,64,128,1,float16,float16,0,0.18061333894729614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,8,64,0,1,float16,fp8,0,0.3205759922663371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,8,64,0,1,fp8,fp8,0,0.32657599449157715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,16,64,0,1,float16,float16,0,0.176639993985494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,16,64,128,1,float16,fp8,0,0.178330659866333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,16,64,0,1,float16,fp8,0,0.1750719944636027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,16,64,0,1,fp8,fp8,0,0.17850667238235474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,16,64,128,1,fp8,fp8,0,0.18385066588719687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,1,64,128,1,float16,float16,0,0.1697333256403605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,1,64,0,1,float16,float16,0,0.16511999567349753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,1,64,128,1,float16,fp8,0,0.16846400499343872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,1,64,128,1,fp8,fp8,0,0.1612160007158915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,1,64,0,1,fp8,fp8,0,0.15427199999491373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,1,64,0,1,float16,fp8,0,0.16481600205103555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,2,64,128,1,float16,float16,0,0.17294933398564658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,2,64,0,1,float16,float16,0,0.17133333285649618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,2,64,128,1,float16,fp8,0,0.1721973419189453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,2,64,128,1,fp8,fp8,0,0.16269333163897196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,2,64,0,1,float16,fp8,0,0.16684265931447348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,2,64,0,1,fp8,fp8,0,0.1588640014330546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,4,64,128,1,float16,float16,0,0.1779413421948751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,4,64,0,1,float16,float16,0,0.17493865887324014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,4,64,128,1,float16,fp8,0,0.17708800236384073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,4,64,128,1,fp8,fp8,0,0.17383466164271036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,4,64,0,1,float16,fp8,0,0.17313599586486816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,4,64,0,1,fp8,fp8,0,0.168122669061025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,8,64,128,1,float16,float16,0,0.17478400468826294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,8,64,0,1,float16,float16,0,0.1711039940516154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,8,64,128,1,float16,fp8,0,0.1722773313522339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,8,64,0,1,float16,fp8,0,0.16918933391571045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,8,64,128,1,fp8,fp8,0,0.17435733477274576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,16,64,128,1,float16,float16,0,0.10128000378608704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,16,64,0,1,float16,float16,0,0.0993173321088155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,8,64,0,1,fp8,fp8,0,0.16926934321721396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,16,64,128,1,float16,fp8,0,0.09889599680900574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,16,64,0,1,float16,fp8,0,0.09776000181833903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,16,64,128,1,fp8,fp8,0,0.10203733046849568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,16,64,0,1,fp8,fp8,0,0.09899200002352397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,1,64,128,1,float16,float16,0,0.09131733576456706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,1,64,0,1,float16,float16,0,0.08905067046483357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,1,64,128,1,fp8,fp8,0,0.08681066830952962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,1,64,128,1,float16,fp8,0,0.09054399530092876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,1,64,0,1,float16,fp8,0,0.08870399991671245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,1,64,0,1,fp8,fp8,0,0.08303466439247131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,2,64,128,1,float16,float16,0,0.09270399808883667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,2,64,0,1,float16,float16,0,0.09149332841237386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,2,64,128,1,float16,fp8,0,0.09268266956011455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,2,64,128,1,fp8,fp8,0,0.0900266667207082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,2,64,0,1,float16,fp8,0,0.09117866555849712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,2,64,0,1,fp8,fp8,0,0.08683199683825175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,4,64,128,1,float16,float16,0,0.09723200400670369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,4,64,0,1,float16,float16,0,0.09511466821034749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,4,64,128,1,float16,fp8,0,0.09641599655151367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,4,64,128,1,fp8,fp8,0,0.0957973301410675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,4,64,0,1,float16,fp8,0,0.09381332993507385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,4,64,0,1,fp8,fp8,0,0.0920960009098053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,8,64,128,1,float16,float16,0,0.09528000156084697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,8,64,0,1,float16,float16,0,0.09303999940554301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,8,64,128,1,float16,fp8,0,0.0934879978497823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,8,64,128,1,fp8,fp8,0,0.09542933106422424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,8,64,0,1,float16,fp8,0,0.09125333031018575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,16,64,128,1,float16,float16,0,0.054842665791511536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,8,64,0,1,fp8,fp8,0,0.09293867150942485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,16,64,0,1,float16,float16,0,0.054010664423306785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,16,64,128,1,float16,fp8,0,0.05397333204746246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,16,64,128,1,fp8,fp8,0,0.05827199916044871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,16,64,0,1,float16,fp8,0,0.0544106662273407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,16,64,0,1,fp8,fp8,0,0.05699733396371206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,1,64,128,1,float16,float16,0,0.05403199791908264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,1,64,0,1,float16,float16,0,0.05227200190226237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,1,64,128,1,float16,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,1,64,128,1,fp8,fp8,0,0.05209066470464071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,1,64,0,1,float16,fp8,0,0.05206400156021118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,1,64,0,1,fp8,fp8,0,0.05020266771316528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,2,64,128,1,float16,float16,0,0.054229333996772766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,2,64,0,1,float16,float16,0,0.052042668064435325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,2,64,128,1,float16,fp8,0,0.05389333268006643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,2,64,128,1,fp8,fp8,0,0.05249066650867462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,2,64,0,1,float16,fp8,0,0.05221866567929586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,2,64,0,1,fp8,fp8,0,0.04966933528582255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,4,64,128,1,float16,float16,0,0.055104002356529236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,4,64,0,1,float16,float16,0,0.0539680023988088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,4,64,128,1,float16,fp8,0,0.055973331133524575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,4,64,128,1,fp8,fp8,0,0.05449600021044413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,4,64,0,1,float16,fp8,0,0.05409599840641022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,8,64,128,1,float16,float16,0,0.054133335749308266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,4,64,0,1,fp8,fp8,0,0.05287999908129374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,8,64,0,1,float16,float16,0,0.05226133267084757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,8,64,128,1,float16,fp8,0,0.05392000079154968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,8,64,128,1,fp8,fp8,0,0.053930665055910744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,8,64,0,1,float16,fp8,0,0.05233600238958994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,16,64,128,1,float16,float16,0,0.035455999275048576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,8,64,0,1,fp8,fp8,0,0.05267733335494995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,16,64,0,1,float16,float16,0,0.03356266766786575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,16,64,128,1,float16,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,16,64,128,1,fp8,fp8,0,0.03764266769091288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,16,64,0,1,float16,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,16,64,0,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,1,64,128,1,float16,float16,0,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,1,64,0,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,1,64,128,1,fp8,fp8,0,0.03469866762558619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,1,64,0,1,float16,fp8,0,0.03504000107447306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,1,64,0,1,fp8,fp8,0,0.03358400116364161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,1,64,128,1,float16,fp8,0,0.03678400069475174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,2,64,128,1,float16,float16,0,0.03581333408753077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,2,64,0,1,float16,float16,0,0.03533333291610082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,2,64,128,1,float16,fp8,0,0.035904000202814736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,2,64,0,1,float16,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,2,64,128,1,fp8,fp8,0,0.036533333361148834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,2,64,0,1,fp8,fp8,0,0.033344000577926636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,4,64,128,1,float16,float16,0,0.036015999813874565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,4,64,0,1,float16,float16,0,0.03428266694148382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,4,64,128,1,float16,fp8,0,0.03585066646337509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,4,64,128,1,fp8,fp8,0,0.03659733384847641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,4,64,0,1,float16,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,4,64,0,1,fp8,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,8,64,128,1,float16,float16,0,0.035546667873859406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,8,64,128,1,float16,fp8,0,0.03586133321126302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,8,64,0,1,float16,float16,0,0.034341332813103996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,8,64,128,1,fp8,fp8,0,0.035349334279696144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,8,64,0,1,float16,fp8,0,0.03367999941110611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,8,64,0,1,fp8,fp8,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,16,64,0,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,16,64,128,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,16,64,128,1,float16,float16,0,0.02425066630045573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,16,64,128,1,fp8,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,16,64,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,16,64,0,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,1,64,128,1,float16,float16,0,0.02386133372783661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,1,64,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,1,64,128,1,float16,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,1,64,128,1,fp8,fp8,0,0.024330665667851765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,1,64,0,1,float16,fp8,0,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,1,64,0,1,fp8,fp8,0,0.022181332111358643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,2,64,128,1,float16,float16,0,0.02455466737349828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,2,64,128,1,float16,fp8,0,0.024682665864626568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,2,64,0,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,2,64,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,2,64,128,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,2,64,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,4,64,128,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,4,64,128,1,float16,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,4,64,0,1,float16,float16,0,0.024400000770886738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,4,64,128,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,4,64,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,4,64,0,1,fp8,fp8,0,0.023658665517965954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,8,64,128,1,float16,float16,0,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,8,64,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,8,64,128,1,float16,fp8,0,0.02430933217207591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,8,64,128,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,8,64,0,1,float16,fp8,0,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,16,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,8,64,0,1,fp8,fp8,0,0.02401600033044815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,16,64,0,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,16,64,128,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,16,64,128,1,fp8,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,16,64,0,1,float16,fp8,0,0.0200853335360686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,16,64,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,1,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,1,64,128,1,float16,float16,0,0.020997333029905956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,1,64,128,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,1,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,1,64,0,1,float16,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,1,64,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,2,64,128,1,float16,float16,0,0.020031999796628952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,2,64,0,1,float16,float16,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,2,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,2,64,128,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,2,64,0,1,float16,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,2,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,4,64,128,1,float16,float16,0,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,4,64,128,1,float16,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,4,64,0,1,float16,float16,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,4,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,4,64,0,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,4,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,8,64,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,8,64,128,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,8,64,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,8,64,128,1,fp8,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,8,64,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,8,64,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,16,64,128,1,float16,float16,0,0.018005333840847015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,16,64,128,1,float16,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,16,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,16,64,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,16,64,128,1,fp8,fp8,0,0.017914666483799618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,16,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,1,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,1,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,1,64,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,1,64,128,1,fp8,fp8,0,0.018309333672126133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,1,64,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,1,64,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,2,64,128,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,2,64,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,2,64,128,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,2,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,2,64,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,2,64,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,4,64,128,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,4,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,4,64,128,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,4,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,4,64,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,4,64,0,1,fp8,fp8,0,0.01775466650724411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,8,64,128,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,8,64,0,1,float16,float16,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,8,64,128,1,float16,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,8,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,8,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,16,64,128,1,float16,float16,0,0.017690667261679966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,8,64,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,16,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,16,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,16,64,128,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,16,64,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,16,64,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,1,64,128,1,float16,float16,0,0.017909333109855652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,1,64,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,1,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,1,64,128,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,1,64,0,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,1,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,2,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,2,64,0,1,float16,float16,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,2,64,128,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,2,64,128,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,2,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,2,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,4,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,4,64,128,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,4,64,128,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,4,64,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,4,64,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,4,64,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,8,64,128,1,float16,float16,0,0.018170667191346485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,8,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,8,64,128,1,float16,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,8,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,8,64,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,8,64,0,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,1,64,128,1,float16,float16,0,0.17776532967885336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,1,64,0,1,float16,float16,0,0.17735999822616577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,1,64,128,1,float16,fp8,0,0.17536000410715738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,1,64,128,1,fp8,fp8,0,0.17106133699417114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,1,64,0,1,float16,fp8,0,0.17498666048049927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,1,64,0,1,fp8,fp8,0,0.17218667268753052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,2,64,128,1,float16,float16,0,0.17926400899887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,2,64,0,1,float16,float16,0,0.17912532885869345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,2,64,128,1,float16,fp8,0,0.1770026683807373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,2,64,128,1,fp8,fp8,0,0.16581867138544717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,2,64,0,1,float16,fp8,0,0.17656532923380533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,2,64,0,1,fp8,fp8,0,0.1644319991270701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,4,64,128,1,float16,float16,0,0.18184532721837363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,4,64,0,1,float16,float16,0,0.1832853356997172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,4,64,128,1,float16,fp8,0,0.17854400475819907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,4,64,128,1,fp8,fp8,0,0.17708265781402588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,4,64,0,1,float16,fp8,0,0.17992534240086874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,4,64,0,1,fp8,fp8,0,0.17787200212478638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,8,64,128,1,float16,float16,0,0.18163732687632242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,8,64,0,1,float16,float16,0,0.18131200472513834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,8,64,128,1,float16,fp8,0,0.1813919941584269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,8,64,128,1,fp8,fp8,0,0.1789919932683309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,8,64,0,1,float16,fp8,0,0.18071999152501425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,16,64,128,1,float16,float16,0,0.10329066713651021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,16,64,0,1,float16,float16,0,0.10348266363143921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,8,64,0,1,fp8,fp8,0,0.17901867628097534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,16,64,128,1,float16,fp8,0,0.1016480028629303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,16,64,128,1,fp8,fp8,0,0.1039466659228007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,16,64,0,1,float16,fp8,0,0.10153599580128987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,16,64,0,1,fp8,fp8,0,0.10295466581980388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,1,64,128,1,float16,float16,0,0.09549867113431294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,1,64,0,1,float16,float16,0,0.09492266178131104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,1,64,128,1,float16,fp8,0,0.09351467092831929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,1,64,128,1,fp8,fp8,0,0.09130666653315227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,1,64,0,1,float16,fp8,0,0.09339200456937154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,1,64,0,1,fp8,fp8,0,0.09090133508046468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,2,64,128,1,float16,float16,0,0.09558932979901631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,2,64,0,1,float16,float16,0,0.09525866309801738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,2,64,128,1,fp8,fp8,0,0.09098133444786072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,2,64,128,1,float16,fp8,0,0.09569600224494934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,2,64,0,1,float16,fp8,0,0.09540800253550212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,2,64,0,1,fp8,fp8,0,0.09139200051625569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,4,64,128,1,float16,float16,0,0.0990773340066274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,4,64,0,1,float16,float16,0,0.09803199768066406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,4,64,128,1,float16,fp8,0,0.09710400303204854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,4,64,128,1,fp8,fp8,0,0.09752532839775085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,4,64,0,1,float16,fp8,0,0.0978559950987498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,4,64,0,1,fp8,fp8,0,0.09915733337402344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,8,64,0,1,float16,float16,0,0.09937600294748943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,8,64,128,1,float16,float16,0,0.09922132889429729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,8,64,128,1,float16,fp8,0,0.09912533561388652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,8,64,128,1,fp8,fp8,0,0.09935466448465984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,8,64,0,1,float16,fp8,0,0.0992693305015564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,8,64,0,1,fp8,fp8,0,0.09982400139172871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,16,64,128,1,float16,float16,0,0.05600533386071523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,16,64,0,1,float16,float16,0,0.055888002117474876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,16,64,128,1,float16,fp8,0,0.05417599777380625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,16,64,128,1,fp8,fp8,0,0.05840000013510386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,16,64,0,1,float16,fp8,0,0.05421333511670431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,16,64,0,1,fp8,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,1,64,128,1,float16,float16,0,0.053871999184290566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,1,64,0,1,float16,float16,0,0.053904001911481224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,1,64,128,1,float16,fp8,0,0.0521066685517629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,1,64,128,1,fp8,fp8,0,0.051781331499417625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,1,64,0,1,float16,fp8,0,0.052522664268811546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,1,64,0,1,fp8,fp8,0,0.05208533505598704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,2,64,128,1,float16,float16,0,0.05409599840641022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,2,64,0,1,float16,float16,0,0.05418133238951365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,2,64,128,1,float16,fp8,0,0.052383999029795326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,2,64,128,1,fp8,fp8,0,0.05101333558559418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,2,64,0,1,float16,fp8,0,0.054245332876841225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,2,64,0,1,fp8,fp8,0,0.052373334765434265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,4,64,128,1,float16,float16,0,0.056090667843818665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,4,64,0,1,float16,float16,0,0.05428266525268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,4,64,128,1,float16,fp8,0,0.05429333448410034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,4,64,128,1,fp8,fp8,0,0.05429866909980774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,4,64,0,1,float16,fp8,0,0.0543093333641688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,4,64,0,1,fp8,fp8,0,0.05425600210825602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,8,64,128,1,float16,float16,0,0.053871999184290566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,8,64,0,1,float16,float16,0,0.05406933526198069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,8,64,128,1,float16,fp8,0,0.05385600030422211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,8,64,128,1,fp8,fp8,0,0.05449600021044413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,8,64,0,1,float16,fp8,0,0.054133335749308266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,8,64,0,1,fp8,fp8,0,0.053727999329566956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,16,64,128,1,float16,float16,0,0.035904000202814736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,16,64,128,1,float16,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,16,64,0,1,float16,float16,0,0.035760000348091125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,16,64,128,1,fp8,fp8,0,0.0359253336985906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,16,64,0,1,float16,fp8,0,0.03733866661787033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,16,64,0,1,fp8,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,1,64,128,1,float16,float16,0,0.035573333501815796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,1,64,0,1,float16,float16,0,0.03575466573238373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,1,64,128,1,float16,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,1,64,128,1,fp8,fp8,0,0.03537066777547201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,1,64,0,1,float16,fp8,0,0.0360000009338061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,1,64,0,1,fp8,fp8,0,0.03379199902216593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,2,64,128,1,float16,float16,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,2,64,0,1,float16,float16,0,0.03595199932654699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,2,64,128,1,float16,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,2,64,128,1,fp8,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,2,64,0,1,float16,fp8,0,0.03588266670703888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,4,64,128,1,float16,float16,0,0.03753600021203359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,2,64,0,1,fp8,fp8,0,0.035962666074434914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,4,64,0,1,float16,float16,0,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,4,64,128,1,float16,fp8,0,0.03578133384386698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,4,64,128,1,fp8,fp8,0,0.03602133442958196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,4,64,0,1,float16,fp8,0,0.035946667194366455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,4,64,0,1,fp8,fp8,0,0.035930665830771126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,8,64,128,1,float16,float16,0,0.03591466695070267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,8,64,0,1,float16,float16,0,0.035887998839219414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,8,64,128,1,float16,fp8,0,0.03646933287382126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,8,64,128,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,8,64,0,1,float16,fp8,0,0.03589333345492681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,8,64,0,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,16,64,128,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,16,64,0,1,float16,float16,0,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,16,64,128,1,float16,fp8,0,0.02442666639884313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,16,64,128,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,16,64,0,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,16,64,0,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,1,64,128,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,1,64,0,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,1,64,128,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,1,64,128,1,fp8,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,1,64,0,1,float16,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,1,64,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,2,64,128,1,float16,float16,0,0.02533866713444392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,2,64,0,1,float16,float16,0,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,2,64,128,1,float16,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,2,64,128,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,2,64,0,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,2,64,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,4,64,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,4,64,128,1,float16,float16,0,0.02383466561635335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,4,64,128,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,4,64,128,1,fp8,fp8,0,0.02554133286078771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,4,64,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,4,64,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,8,64,128,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,8,64,0,1,float16,float16,0,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,8,64,128,1,float16,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,8,64,128,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,8,64,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,8,64,0,1,fp8,fp8,0,0.024517332514127094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,16,64,128,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,16,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,16,64,128,1,float16,fp8,0,0.01775466650724411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,16,64,128,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,16,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,16,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,1,64,128,1,float16,float16,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,1,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,1,64,128,1,float16,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,1,64,128,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,1,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,1,64,0,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,2,64,128,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,2,64,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,2,64,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,2,64,128,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,2,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,2,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,4,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,4,64,0,1,float16,float16,0,0.01828266680240631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,4,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,4,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,8,64,128,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,4,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,4,64,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,8,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,8,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,8,64,128,1,fp8,fp8,0,0.01775466650724411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,8,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,16,64,128,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,8,64,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,16,64,0,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,16,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,16,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,1,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,16,64,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,16,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,1,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,1,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,1,64,128,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,1,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,1,64,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,2,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,2,64,0,1,float16,float16,0,0.015728000551462173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,2,64,128,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,2,64,128,1,fp8,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,2,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,2,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,4,64,128,1,float16,float16,0,0.01666133354107539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,4,64,0,1,float16,float16,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,4,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,4,64,128,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,4,64,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,4,64,0,1,fp8,fp8,0,0.016255999604860943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,8,64,128,1,float16,float16,0,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,8,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,8,64,128,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,8,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,8,64,128,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,8,64,0,1,fp8,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,16,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,16,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,16,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,16,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,16,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,16,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,1,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,1,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,1,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,1,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,1,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,1,64,0,1,fp8,fp8,0,0.016037333756685257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,2,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,2,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,2,64,128,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,2,64,128,1,fp8,fp8,0,0.016330666840076447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,2,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,2,64,0,1,fp8,fp8,0,0.01562133307258288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,4,64,128,1,float16,float16,0,0.016303999970356624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,4,64,0,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,4,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,4,64,128,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,4,64,0,1,float16,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,4,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,8,64,128,1,float16,float16,0,0.01621866722901662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,8,64,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,8,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,8,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,8,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,16,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,8,64,0,1,fp8,fp8,0,0.016415999581416447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,16,64,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,16,64,128,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,16,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,16,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,16,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,1,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,1,64,128,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,1,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,1,64,128,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,1,64,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,1,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,2,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,2,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,2,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,2,64,128,1,fp8,fp8,0,0.016037333756685257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,2,64,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,2,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,4,64,128,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,4,64,0,1,float16,float16,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,4,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,4,64,128,1,fp8,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,4,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,4,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,8,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,8,64,0,1,float16,float16,0,0.01659199967980385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,8,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,8,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,8,64,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,8,64,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,1,64,128,1,float16,float16,0,0.12326932946840923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,1,64,0,1,float16,float16,0,0.12353066603342693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,1,64,128,1,float16,fp8,0,0.1239359974861145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,1,64,0,1,float16,fp8,0,0.12190933028856914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,1,64,128,1,fp8,fp8,0,0.11555199821790059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,1,64,0,1,fp8,fp8,0,0.11737066507339478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,2,64,128,1,float16,float16,0,0.1237386663754781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,2,64,0,1,float16,float16,0,0.12451199690500896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,2,64,128,1,float16,fp8,0,0.12418666481971741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,2,64,128,1,fp8,fp8,0,0.1155413289864858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,2,64,0,1,fp8,fp8,0,0.11726933717727661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,4,64,128,1,float16,float16,0,0.1263200044631958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,2,64,0,1,float16,fp8,0,0.12286399801572163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,4,64,0,1,float16,float16,0,0.12779200077056885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,4,64,128,1,float16,fp8,0,0.12568533420562744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,4,64,128,1,fp8,fp8,0,0.12386666735013326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,4,64,0,1,float16,fp8,0,0.12615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,4,64,0,1,fp8,fp8,0,0.12341333429018657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,8,64,128,1,float16,float16,0,0.12684266765912375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,8,64,0,1,float16,float16,0,0.12820266683896384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,8,64,128,1,fp8,fp8,0,0.12366933623949687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,8,64,128,1,float16,fp8,0,0.12758933504422507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,8,64,0,1,float16,fp8,0,0.12677866220474243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,8,64,0,1,fp8,fp8,0,0.12408000230789185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,16,64,128,1,float16,float16,0,0.07076799869537354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,16,64,0,1,float16,float16,0,0.07039466500282288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,16,64,128,1,float16,fp8,0,0.07049599786599477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,16,64,128,1,fp8,fp8,0,0.07046400010585785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,16,64,0,1,float16,fp8,0,0.0703359991312027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,16,64,0,1,fp8,fp8,0,0.07085866729418437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,1,64,128,1,float16,float16,0,0.06840000053246816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,1,64,0,1,float16,float16,0,0.06868266562620799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,1,64,128,1,float16,fp8,0,0.06776533524195354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,1,64,128,1,fp8,fp8,0,0.06454933186372121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,1,64,0,1,float16,fp8,0,0.06877866884072621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,1,64,0,1,fp8,fp8,0,0.06410666803518932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,2,64,128,1,float16,float16,0,0.06877333422501881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,2,64,0,1,float16,float16,0,0.06841066479682922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,2,64,128,1,float16,fp8,0,0.06806399921576183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,2,64,128,1,fp8,fp8,0,0.06434666613737743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,2,64,0,1,float16,fp8,0,0.06763733426729839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,4,64,128,1,float16,float16,0,0.06840533514817555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,2,64,0,1,fp8,fp8,0,0.06453866759936015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,4,64,0,1,float16,float16,0,0.06846933563550313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,4,64,128,1,float16,fp8,0,0.0684799998998642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,4,64,128,1,fp8,fp8,0,0.06829333305358887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,4,64,0,1,float16,fp8,0,0.0682773341735204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,4,64,0,1,fp8,fp8,0,0.06644799808661143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,8,64,128,1,float16,float16,0,0.06875200072924297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,8,64,128,1,float16,fp8,0,0.06875733534495036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,8,64,0,1,float16,float16,0,0.06829866766929626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,8,64,128,1,fp8,fp8,0,0.06854933500289917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,8,64,0,1,float16,fp8,0,0.06876799960931142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,16,64,128,1,float16,float16,0,0.041893333196640015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,8,64,0,1,fp8,fp8,0,0.06857066849867503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,16,64,128,1,float16,fp8,0,0.04181866844495138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,16,64,0,1,float16,float16,0,0.041802664597829185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,16,64,128,1,fp8,fp8,0,0.0432586669921875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,16,64,0,1,float16,fp8,0,0.04199466605981191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,16,64,0,1,fp8,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,1,64,128,1,float16,float16,0,0.04168533285458883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,1,64,0,1,float16,float16,0,0.0422026664018631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,1,64,128,1,float16,fp8,0,0.04213866591453552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,1,64,128,1,fp8,fp8,0,0.04001600046952566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,1,64,0,1,float16,fp8,0,0.04196266829967499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,1,64,0,1,fp8,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,2,64,128,1,float16,float16,0,0.041706666350364685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,2,64,0,1,float16,float16,0,0.042175998290379844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,2,64,128,1,float16,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,2,64,128,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,2,64,0,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,2,64,0,1,fp8,fp8,0,0.039919999738534294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,4,64,128,1,float16,float16,0,0.04224533339341482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,4,64,0,1,float16,float16,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,4,64,128,1,float16,fp8,0,0.041989331444104515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,4,64,128,1,fp8,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,4,64,0,1,float16,fp8,0,0.042090664307276406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,4,64,0,1,fp8,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,8,64,128,1,float16,float16,0,0.04170133173465729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,8,64,0,1,float16,float16,0,0.042447999119758606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,8,64,128,1,float16,fp8,0,0.042634665966033936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,8,64,128,1,fp8,fp8,0,0.041850666205088295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,8,64,0,1,float16,fp8,0,0.04267199834187826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,8,64,0,1,fp8,fp8,0,0.0422986646493276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,16,64,128,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,16,64,0,1,float16,float16,0,0.0276853342851003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,16,64,128,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,16,64,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,16,64,128,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,16,64,0,1,fp8,fp8,0,0.02787200113137563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,1,64,128,1,float16,float16,0,0.027642667293548584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,1,64,0,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,1,64,128,1,float16,fp8,0,0.0290133332212766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,1,64,0,1,float16,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,1,64,0,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,1,64,128,1,fp8,fp8,0,0.027637332677841187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,2,64,128,1,float16,float16,0,0.028704000016053517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,2,64,128,1,float16,fp8,0,0.02923733244339625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,2,64,0,1,float16,float16,0,0.02773866554101308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,2,64,0,1,float16,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,2,64,128,1,fp8,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,2,64,0,1,fp8,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,4,64,128,1,float16,float16,0,0.027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,4,64,0,1,float16,float16,0,0.0276053324341774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,4,64,128,1,float16,fp8,0,0.028005334238211315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,4,64,128,1,fp8,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,4,64,0,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,4,64,0,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,8,64,0,1,float16,float16,0,0.029120000700155895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,8,64,128,1,float16,float16,0,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,8,64,128,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,8,64,128,1,fp8,fp8,0,0.027813332776228588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,8,64,0,1,float16,fp8,0,0.027776000400384266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,16,64,128,1,float16,float16,0,0.019882666567961376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,8,64,0,1,fp8,fp8,0,0.028773332635561626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,16,64,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,16,64,128,1,float16,fp8,0,0.020773333807786305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,16,64,128,1,fp8,fp8,0,0.020245333512624104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,16,64,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,16,64,0,1,float16,fp8,0,0.019850666324297588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,1,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,1,64,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,1,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,1,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,1,64,128,1,fp8,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,1,64,0,1,fp8,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,2,64,128,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,2,64,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,2,64,128,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,2,64,128,1,float16,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,2,64,0,1,float16,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,2,64,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,4,64,128,1,float16,float16,0,0.021568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,4,64,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,4,64,128,1,float16,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,4,64,128,1,fp8,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,4,64,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,4,64,0,1,fp8,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,8,64,128,1,float16,float16,0,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,8,64,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,8,64,128,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,8,64,128,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,8,64,0,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,8,64,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,16,64,128,1,float16,float16,0,0.015856000284353893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,16,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,16,64,128,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,16,64,128,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,16,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,16,64,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,1,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,1,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,1,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,1,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,1,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,1,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,2,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,2,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,2,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,2,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,2,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,2,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,4,64,128,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,4,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,4,64,128,1,float16,fp8,0,0.016058667252461117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,4,64,128,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,4,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,4,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,8,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,8,64,0,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,8,64,128,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,8,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,8,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,16,64,128,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,8,64,0,1,fp8,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,16,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,16,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,16,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,16,64,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,16,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,1,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,1,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,1,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,1,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,1,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,2,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,2,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,2,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,2,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,2,64,0,1,float16,fp8,0,0.015717333803574245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,2,64,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,4,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,4,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,4,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,4,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,4,64,0,1,float16,fp8,0,0.01594666639963786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,4,64,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,8,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,8,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,8,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,8,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,8,64,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,8,64,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,16,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,16,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,16,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,16,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,16,64,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,16,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,1,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,1,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,1,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,1,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,1,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,2,64,128,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,2,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,2,64,128,1,float16,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,2,64,128,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,2,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,2,64,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,4,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,4,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,4,64,128,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,4,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,4,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,8,64,128,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,4,64,0,1,float16,fp8,0,0.016656000167131424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,8,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,8,64,128,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,8,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,8,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,8,64,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,16,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,16,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,16,64,128,1,float16,float16,0,0.015967999895413715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,16,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,16,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,16,64,0,1,fp8,fp8,0,0.016016000260909397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,1,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,1,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,1,64,128,1,float16,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,1,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,1,64,0,1,float16,fp8,0,0.016143999993801117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,2,64,128,1,float16,float16,0,0.014682666709025701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,2,64,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,1,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,2,64,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,2,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,2,64,0,1,float16,fp8,0,0.01581866666674614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,2,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,4,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,4,64,0,1,float16,float16,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,4,64,128,1,float16,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,4,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,4,64,0,1,float16,fp8,0,0.015717333803574245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,4,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,8,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,8,64,0,1,float16,float16,0,0.015530666957298914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,8,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,8,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,8,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,8,64,0,1,float16,fp8,0,0.016336000214020412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,1,64,128,1,float16,float16,0,0.09700266520182292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,1,64,0,1,float16,float16,0,0.09523733456929524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,1,64,128,1,float16,fp8,0,0.09566400448481242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,1,64,128,1,fp8,fp8,0,0.09102933605511983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,1,64,0,1,float16,fp8,0,0.09511466821034749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,1,64,0,1,fp8,fp8,0,0.09128533800443013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,2,64,128,1,float16,float16,0,0.0960693359375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,2,64,0,1,float16,float16,0,0.09714133540789287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,2,64,128,1,float16,fp8,0,0.09756799538930257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,2,64,128,1,fp8,fp8,0,0.09123733639717102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,2,64,0,1,float16,fp8,0,0.09671466549237569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,4,64,128,1,float16,float16,0,0.09714133540789287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,2,64,0,1,fp8,fp8,0,0.09124799569447835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,4,64,0,1,float16,float16,0,0.09722666939099629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,4,64,128,1,float16,fp8,0,0.09709866841634114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,4,64,128,1,fp8,fp8,0,0.09327466289202373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,4,64,0,1,float16,fp8,0,0.09578133622805278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,4,64,0,1,fp8,fp8,0,0.09436266620953877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,8,64,128,1,float16,float16,0,0.0972106655438741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,8,64,0,1,float16,float16,0,0.09559999903043111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,8,64,128,1,float16,fp8,0,0.09663466612497966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,8,64,128,1,fp8,fp8,0,0.0939359962940216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,8,64,0,1,float16,fp8,0,0.0956053336461385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,8,64,0,1,fp8,fp8,0,0.09337600072224934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,16,64,128,1,float16,float16,0,0.056746666630109154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,16,64,0,1,float16,float16,0,0.0558186670144399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,16,64,128,1,float16,fp8,0,0.05622933308283488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,16,64,128,1,fp8,fp8,0,0.056549335519472756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,16,64,0,1,float16,fp8,0,0.056474665800730385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,1,64,128,1,float16,float16,0,0.05602133274078369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,16,64,0,1,fp8,fp8,0,0.05683733522891998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,1,64,0,1,float16,float16,0,0.05671466886997223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,1,64,128,1,float16,fp8,0,0.05611200133959452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,1,64,128,1,fp8,fp8,0,0.05407466491063436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,1,64,0,1,fp8,fp8,0,0.0543093333641688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,2,64,128,1,float16,float16,0,0.055999999245007835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,1,64,0,1,float16,fp8,0,0.0565280020236969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,2,64,0,1,float16,float16,0,0.05689600110054016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,2,64,128,1,float16,fp8,0,0.05608533322811127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,2,64,128,1,fp8,fp8,0,0.05402133365472158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,2,64,0,1,fp8,fp8,0,0.05429866909980774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,2,64,0,1,float16,fp8,0,0.057061334451039634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,4,64,128,1,float16,float16,0,0.05630933245023092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,4,64,0,1,float16,float16,0,0.05640000104904175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,4,64,128,1,float16,fp8,0,0.058090666929880776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,4,64,128,1,fp8,fp8,0,0.056757330894470215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,4,64,0,1,float16,fp8,0,0.056287998954455055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,4,64,0,1,fp8,fp8,0,0.05435200035572052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,8,64,128,1,float16,float16,0,0.05599466462930044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,8,64,128,1,float16,fp8,0,0.05845333139101664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,8,64,128,1,fp8,fp8,0,0.056159997979799904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,8,64,0,1,float16,float16,0,0.05783466498057047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,8,64,0,1,float16,fp8,0,0.05635199944178263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,16,64,128,1,float16,float16,0,0.03527999917666117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,8,64,0,1,fp8,fp8,0,0.056159997979799904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,16,64,0,1,float16,float16,0,0.03573866685231527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,16,64,128,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,16,64,128,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,16,64,0,1,float16,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,1,64,128,1,float16,float16,0,0.03536533315976461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,16,64,0,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,1,64,0,1,float16,float16,0,0.03389866650104523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,1,64,128,1,float16,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,1,64,128,1,fp8,fp8,0,0.034058667719364166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,1,64,0,1,float16,fp8,0,0.0335359995563825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,1,64,0,1,fp8,fp8,0,0.033370666205883026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,2,64,128,1,float16,float16,0,0.033674667278925575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,2,64,0,1,float16,float16,0,0.03364266703526179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,2,64,128,1,float16,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,2,64,128,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,2,64,0,1,float16,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,2,64,0,1,fp8,fp8,0,0.03382933388153712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,4,64,128,1,float16,float16,0,0.035631999373435974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,4,64,0,1,float16,float16,0,0.035749333600203194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,4,64,128,1,float16,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,4,64,0,1,float16,fp8,0,0.03538133452335993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,4,64,128,1,fp8,fp8,0,0.03499199946721395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,4,64,0,1,fp8,fp8,0,0.03508266558249792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,8,64,128,1,float16,float16,0,0.035605333745479584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,8,64,0,1,float16,float16,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,8,64,128,1,float16,fp8,0,0.03600533306598663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,8,64,128,1,fp8,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,8,64,0,1,float16,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,8,64,0,1,fp8,fp8,0,0.03401066611210505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,16,64,128,1,float16,float16,0,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,16,64,0,1,float16,float16,0,0.023749334116776783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,16,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,16,64,128,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,16,64,0,1,float16,fp8,0,0.023845332364241283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,1,64,128,1,float16,float16,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,16,64,0,1,fp8,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,1,64,0,1,float16,float16,0,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,1,64,128,1,float16,fp8,0,0.023775999744733173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,1,64,128,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,1,64,0,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,1,64,0,1,fp8,fp8,0,0.024277334411938984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,2,64,128,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,2,64,128,1,float16,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,2,64,0,1,float16,float16,0,0.023919999599456787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,2,64,128,1,fp8,fp8,0,0.023765332996845245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,2,64,0,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,2,64,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,4,64,128,1,float16,float16,0,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,4,64,0,1,float16,float16,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,4,64,128,1,float16,fp8,0,0.025018667181332905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,4,64,128,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,4,64,0,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,4,64,0,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,8,64,128,1,float16,float16,0,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,8,64,0,1,float16,float16,0,0.025050667424996693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,8,64,128,1,float16,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,8,64,128,1,fp8,fp8,0,0.023610666394233704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,8,64,0,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,8,64,0,1,fp8,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,16,64,128,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,16,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,16,64,128,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,16,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,16,64,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,16,64,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,1,64,128,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,1,64,0,1,float16,float16,0,0.01865600049495697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,1,64,128,1,float16,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,1,64,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,1,64,128,1,fp8,fp8,0,0.018021332720915478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,1,64,0,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,2,64,128,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,2,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,2,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,2,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,2,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,2,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,4,64,128,1,float16,float16,0,0.01871466636657715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,4,64,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,4,64,128,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,4,64,0,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,4,64,128,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,4,64,0,1,fp8,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,8,64,128,1,float16,float16,0,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,8,64,0,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,8,64,128,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,8,64,128,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,8,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,8,64,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,16,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,16,64,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,16,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,16,64,128,1,fp8,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,16,64,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,16,64,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,1,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,1,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,1,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,1,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,1,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,1,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,2,64,128,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,2,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,2,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,2,64,128,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,2,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,2,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,4,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,4,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,4,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,4,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,4,64,0,1,float16,fp8,0,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,4,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,8,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,8,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,8,64,128,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,8,64,128,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,8,64,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,8,64,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,16,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,16,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,16,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,16,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,16,64,0,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,16,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,1,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,1,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,1,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,1,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,1,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,1,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,2,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,2,64,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,2,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,2,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,2,64,0,1,float16,fp8,0,0.015685333559910457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,2,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,4,64,128,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,4,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,4,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,4,64,128,1,fp8,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,4,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,4,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,8,64,128,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,8,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,8,64,128,1,float16,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,8,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,8,64,0,1,float16,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,8,64,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,16,64,128,1,float16,float16,0,0.015770666301250458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,16,64,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,16,64,128,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,16,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,16,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,16,64,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,1,64,128,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,1,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,1,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,1,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,1,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,1,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,2,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,2,64,0,1,float16,float16,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,2,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,2,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,2,64,0,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,2,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,4,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,4,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,4,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,4,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,4,64,0,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,4,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,8,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,8,64,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,8,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,8,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,8,64,0,1,float16,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,8,64,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,16,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,16,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,16,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,16,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,16,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,16,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,1,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,1,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,1,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,1,64,128,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,1,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,1,64,0,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,2,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,2,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,2,64,128,1,float16,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,2,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,2,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,2,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,4,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,4,64,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,4,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,4,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,4,64,0,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,4,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,8,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,8,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,8,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,8,64,128,1,fp8,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,8,64,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,8,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,1,64,0,1,float16,float16,0,0.08679466446240743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,1,64,128,1,float16,fp8,0,0.08718400200208028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,1,64,128,1,float16,float16,0,0.08669867118199666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,1,64,128,1,fp8,fp8,0,0.08066133161385854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,1,64,0,1,float16,fp8,0,0.08702400326728821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,1,64,0,1,fp8,fp8,0,0.08130666613578796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,2,64,128,1,float16,float16,0,0.08691733082135518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,2,64,0,1,float16,float16,0,0.08682133754094441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,2,64,128,1,float16,fp8,0,0.08716799815495808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,2,64,128,1,fp8,fp8,0,0.08116800089677174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,2,64,0,1,float16,fp8,0,0.08706133564313252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,2,64,0,1,fp8,fp8,0,0.08092266817887624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,4,64,128,1,float16,float16,0,0.0867199997107188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,4,64,0,1,float16,float16,0,0.08715200424194336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,4,64,128,1,float16,fp8,0,0.0869813362757365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,4,64,128,1,fp8,fp8,0,0.08272000153859456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,4,64,0,1,float16,fp8,0,0.08738666772842407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,4,64,0,1,fp8,fp8,0,0.08259200056393941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,8,64,128,1,float16,float16,0,0.08715732892354329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,8,64,0,1,float16,float16,0,0.08711999654769897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,8,64,128,1,float16,fp8,0,0.08706133564313252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,8,64,128,1,fp8,fp8,0,0.08308266599973042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,8,64,0,1,float16,fp8,0,0.08703466256459554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,16,64,128,1,float16,float16,0,0.0498879998922348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,16,64,0,1,float16,float16,0,0.05035733183224996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,8,64,0,1,fp8,fp8,0,0.08268266419569652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,16,64,128,1,float16,fp8,0,0.050250664353370667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,16,64,128,1,fp8,fp8,0,0.04832000037034353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,16,64,0,1,float16,fp8,0,0.050101334849993386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,16,64,0,1,fp8,fp8,0,0.048351998130480446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,1,64,128,1,float16,float16,0,0.05011733373006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,1,64,0,1,float16,float16,0,0.05003733436266581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,1,64,128,1,float16,fp8,0,0.04990399877230326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,1,64,128,1,fp8,fp8,0,0.04743466774622599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,1,64,0,1,float16,fp8,0,0.05008533100287119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,1,64,0,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,2,64,128,1,float16,float16,0,0.05031466484069824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,2,64,0,1,float16,float16,0,0.04978133241335551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,2,64,128,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,2,64,128,1,fp8,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,2,64,0,1,float16,fp8,0,0.04995200037956238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,2,64,0,1,fp8,fp8,0,0.046351999044418335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,4,64,0,1,float16,float16,0,0.05014933149019877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,4,64,128,1,float16,float16,0,0.05027733246485392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,4,64,128,1,fp8,fp8,0,0.0480320006608963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,4,64,128,1,float16,fp8,0,0.05027199784914652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,4,64,0,1,float16,fp8,0,0.05013866722583771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,8,64,128,1,float16,float16,0,0.05000533163547516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,4,64,0,1,fp8,fp8,0,0.048623998959859215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,8,64,0,1,float16,float16,0,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,8,64,128,1,float16,fp8,0,0.05156266689300537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,16,64,128,1,float16,float16,0,0.03206400076548258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,8,64,128,1,fp8,fp8,0,0.04795200129350027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,8,64,0,1,fp8,fp8,0,0.04786666731039683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,8,64,0,1,float16,fp8,0,0.05057600140571594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,16,64,0,1,float16,float16,0,0.03169599920511246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,16,64,128,1,float16,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,16,64,128,1,fp8,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,16,64,0,1,float16,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,1,64,128,1,float16,float16,0,0.03182400017976761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,16,64,0,1,fp8,fp8,0,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,1,64,0,1,float16,float16,0,0.0315786674618721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,1,64,128,1,float16,fp8,0,0.029663999875386555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,1,64,128,1,fp8,fp8,0,0.030896000564098358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,1,64,0,1,float16,fp8,0,0.031445334355036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,1,64,0,1,fp8,fp8,0,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,2,64,0,1,float16,float16,0,0.031317333380381264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,2,64,128,1,float16,float16,0,0.02992533395687739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,2,64,128,1,float16,fp8,0,0.031210665901501972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,2,64,128,1,fp8,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,2,64,0,1,float16,fp8,0,0.031386665999889374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,4,64,128,1,float16,float16,0,0.03129599988460541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,2,64,0,1,fp8,fp8,0,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,4,64,0,1,float16,float16,0,0.031504000226656594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,4,64,128,1,float16,fp8,0,0.03123733401298523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,4,64,128,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,4,64,0,1,float16,fp8,0,0.03177600105603536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,4,64,0,1,fp8,fp8,0,0.029839999973773956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,8,64,128,1,float16,float16,0,0.03149333347876867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,8,64,0,1,float16,float16,0,0.031770666440327965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,8,64,128,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,8,64,128,1,fp8,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,8,64,0,1,float16,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,8,64,0,1,fp8,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,16,64,128,1,float16,float16,0,0.022698665658632915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,16,64,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,16,64,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,16,64,128,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,16,64,0,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,16,64,0,1,fp8,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,1,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,1,64,0,1,float16,float16,0,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,1,64,128,1,float16,fp8,0,0.023589332898457844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,1,64,0,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,1,64,128,1,fp8,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,1,64,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,2,64,128,1,float16,float16,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,2,64,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,2,64,128,1,float16,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,2,64,128,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,2,64,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,2,64,0,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,4,64,128,1,float16,float16,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,4,64,0,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,4,64,128,1,float16,fp8,0,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,4,64,128,1,fp8,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,4,64,0,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,8,64,128,1,float16,float16,0,0.022122666239738464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,4,64,0,1,fp8,fp8,0,0.022005334496498108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,8,64,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,8,64,128,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,8,64,128,1,float16,fp8,0,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,8,64,0,1,float16,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,8,64,0,1,fp8,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,16,64,128,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,16,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,16,64,128,1,float16,fp8,0,0.019930666933457058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,16,64,128,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,16,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,16,64,0,1,fp8,fp8,0,0.017952000101407368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,1,64,128,1,float16,float16,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,1,64,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,1,64,128,1,float16,fp8,0,0.018895999838908512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,1,64,128,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,1,64,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,1,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,2,64,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,2,64,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,2,64,128,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,2,64,128,1,fp8,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,2,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,2,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,4,64,128,1,float16,float16,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,4,64,0,1,float16,float16,0,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,4,64,128,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,4,64,128,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,4,64,0,1,float16,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,4,64,0,1,fp8,fp8,0,0.01846933364868164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,8,64,128,1,float16,float16,0,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,8,64,0,1,float16,float16,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,8,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,8,64,128,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,8,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,8,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,16,64,128,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,16,64,0,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,16,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,16,64,128,1,fp8,fp8,0,0.016048000504573185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,16,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,16,64,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,1,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,1,64,0,1,float16,float16,0,0.015637333194414776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,1,64,128,1,float16,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,1,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,1,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,2,64,128,1,float16,float16,0,0.016250666230916977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,2,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,2,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,2,64,128,1,fp8,fp8,0,0.015909332782030106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,2,64,0,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,2,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,4,64,128,1,float16,float16,0,0.01632000009218852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,4,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,4,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,4,64,128,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,4,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,4,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,8,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,8,64,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,8,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,8,64,128,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,8,64,0,1,float16,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,8,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,16,64,128,1,float16,float16,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,16,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,16,64,128,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,16,64,128,1,fp8,fp8,0,0.015674666812022526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,16,64,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,16,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,1,64,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,1,64,128,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,1,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,1,64,0,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,1,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,2,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,2,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,2,64,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,2,64,128,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,2,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,2,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,4,64,128,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,4,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,4,64,128,1,float16,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,4,64,128,1,fp8,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,4,64,0,1,float16,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,4,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,8,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,8,64,0,1,float16,float16,0,0.014602666099866232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,8,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,8,64,0,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,8,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,8,64,128,1,fp8,fp8,0,0.015770666301250458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,16,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,16,64,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,16,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,16,64,0,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,16,64,128,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,16,64,0,1,fp8,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,1,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,1,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,1,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,1,64,128,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,1,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,1,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,2,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,2,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,2,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,2,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,2,64,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,2,64,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,4,64,128,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,4,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,4,64,128,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,4,64,128,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,4,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,4,64,0,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,8,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,8,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,8,64,128,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,8,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,8,64,0,1,float16,fp8,0,0.01626666635274887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,8,64,0,1,fp8,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,16,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,16,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,16,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,16,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,16,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,16,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,1,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,1,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,1,64,128,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,1,64,128,1,fp8,fp8,0,0.014757333944241205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,1,64,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,1,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,2,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,2,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,2,64,128,1,float16,fp8,0,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,2,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,2,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,2,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,4,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,4,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,4,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,4,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,4,64,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,8,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,8,64,0,1,float16,float16,0,0.014602666099866232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,8,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,8,64,128,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,8,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,8,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,0,0.07459733386834462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,0,0.07503999769687653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,0,0.07640000184377034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,1,64,128,1,fp8,fp8,0,0.06871466835339864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,0,0.07471466561158498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,1,64,0,1,fp8,fp8,0,0.06885333359241486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,0,0.07477333148320515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,0,0.07449600100517273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,0,0.0764213353395462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,2,64,128,1,fp8,fp8,0,0.068271999557813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,0,0.07582933207352956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,2,64,0,1,fp8,fp8,0,0.07062933345635732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,0,0.07454933226108551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,0,0.07464000085989635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,0,0.07481599847475688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,4,64,128,1,fp8,fp8,0,0.0684799998998642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,0,0.07429333527882893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,4,64,0,1,fp8,fp8,0,0.0684853345155716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,0,0.0745600014925003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,0,0.0745119998852412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,0,0.075013334552447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,8,64,128,1,fp8,fp8,0,0.0687253326177597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,16,64,128,1,float16,float16,0,0.04578666885693868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,0,0.04381866753101349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,0,0.07489066819349925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,8,64,0,1,fp8,fp8,0,0.06843733290831248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,16,64,128,1,float16,fp8,0,0.045791998505592346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,16,64,128,1,fp8,fp8,0,0.042122667034467064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,0,0.045706664522488914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,16,64,0,1,fp8,fp8,0,0.041706666350364685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,0,0.045653333266576133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,0,0.0439573327700297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,0,0.04562133550643921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,0,0.043978666265805565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,1,64,128,1,fp8,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,1,64,0,1,fp8,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,0,0.04549333453178406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,0,0.04413333535194397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,0,0.04394133388996124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,2,64,128,1,fp8,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,0,0.044351999958356224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,2,64,0,1,fp8,fp8,0,0.042021334171295166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,0,0.04552533229192098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,0,0.04408533374468485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,0,0.04586133360862732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,4,64,128,1,fp8,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,0,0.04410133262475332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,4,64,0,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,0,0.043765331308046974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,0,0.045647998650868736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,8,64,128,1,fp8,fp8,0,0.041493333876132965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,0,0.04604266583919525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,8,64,0,1,fp8,fp8,0,0.0420959989229838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,16,64,128,1,float16,float16,0,0.029482667644818623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,0,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,16,64,128,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,16,64,128,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,16,64,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,0,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,1,64,128,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,0,0.029466666281223297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,1,64,0,1,fp8,fp8,0,0.02880000074704488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,0,0.029845332105954487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,2,64,128,1,fp8,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,0,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,2,64,0,1,fp8,fp8,0,0.028090665737787884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,0,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,0,0.029861333469549816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,4,64,128,1,fp8,fp8,0,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,0,0.029866665601730347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,4,64,0,1,fp8,fp8,0,0.02775999903678894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,0,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,8,64,128,1,fp8,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,8,64,0,1,fp8,fp8,0,0.027973333994547527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,16,64,128,1,float16,float16,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,16,64,128,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,16,64,128,1,fp8,fp8,0,0.021829334398110706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,0,0.021770666042963665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,16,64,0,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,0,0.02239999920129776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,0,0.02184533327817917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,1,64,128,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,1,64,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,2,64,128,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,2,64,0,1,fp8,fp8,0,0.021664001047611237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,0,0.02162666618824005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,4,64,128,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,0,0.023007998863856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,0,0.02418133368094762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,4,64,0,1,fp8,fp8,0,0.02186666677395503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,0,0.02367999901374181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,8,64,128,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,8,64,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,16,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,0,0.02367999901374181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,16,64,128,1,float16,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,16,64,128,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,0,0.01969066634774208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,16,64,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,0,0.019839999576409657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,1,64,128,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,0,0.017562666287024815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,0,0.018432000031073887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,1,64,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,0,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,2,64,128,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,0,0.017690667261679966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,2,64,0,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,4,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,4,64,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,0,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,0,0.017903999735911686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,8,64,128,1,fp8,fp8,0,0.018218666315078735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,16,64,128,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,8,64,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,16,64,128,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,16,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,16,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,0,0.016202667107184727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,1,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,1,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,2,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,2,64,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,4,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,4,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,8,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,16,64,128,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,8,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,16,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,16,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,16,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,1,64,128,1,fp8,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,1,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,2,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,2,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,4,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,4,64,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,0,0.014783999572197596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,8,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,8,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,16,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,16,64,128,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,16,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,16,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,1,64,128,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,1,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,2,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,2,64,0,1,fp8,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,0,0.01648533344268799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,4,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,4,64,0,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,0,0.014720000326633453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,8,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,8,64,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,16,64,128,1,float16,float16,0,0.015749332805474598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,16,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,16,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,0,0.015941333025693893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,16,64,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,0,0.015925332903862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,1,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,1,64,0,1,fp8,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,0,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,2,64,128,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,2,64,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,4,64,128,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,4,64,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,0,0.014453332871198654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,8,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,8,64,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,float16,0,0.6335200071334839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,1,64,128,1,float16,fp8,0,0.6398400068283081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,1,64,128,1,fp8,fp8,0,0.5846453507741293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,float16,0,3.2729174296061196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,float16,0,0.6442986726760864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,1,64,0,1,float16,fp8,0,3.284330685933431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,1,64,0,1,fp8,fp8,0,2.824864069620768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,2,64,128,1,float16,fp8,0,0.6505333185195923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,2,64,128,1,fp8,fp8,0,0.5988106727600098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,float16,0,0.6614186763763428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,float16,0,3.2913331985473633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,2,64,0,1,float16,fp8,0,3.297226587931315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,2,64,0,1,fp8,fp8,0,2.845066706339518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,4,64,128,1,float16,fp8,0,0.667242685953776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,4,64,128,1,fp8,fp8,0,0.6170666615168253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,float16,0,0.37934935092926025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,float16,0,3.305727958679199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,12,64,128,1,float16,fp8,0,0.3896640141805013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,4,64,0,1,fp8,fp8,0,2.863194783528646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,4,64,0,1,float16,fp8,0,3.313103993733724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,12,64,128,1,fp8,fp8,0,0.365392009417216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,float16,0,1.750165303548177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,float16,0,0.34091734886169434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,1,64,128,1,float16,fp8,0,0.34327999750773114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,12,64,0,1,fp8,fp8,0,1.5247947374979656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,12,64,0,1,float16,fp8,0,1.7623359362284343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,float16,0,1.7012640635172527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,1,64,128,1,fp8,fp8,0,0.3163093328475952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,float16,0,0.3426080147425334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,2,64,128,1,float16,fp8,0,0.34701867898305255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,1,64,0,1,fp8,fp8,0,1.47542937596639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,1,64,0,1,float16,fp8,0,1.7007733980814617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,2,64,128,1,fp8,fp8,0,0.3224800030390422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,float16,0,1.7094240188598633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,float16,0,0.35146665573120117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,2,64,0,1,fp8,fp8,0,1.477392037709554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,2,64,0,1,float16,fp8,0,1.7067947387695312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,4,64,128,1,float16,fp8,0,0.35675732294718426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,float16,0,1.7147253354390461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,4,64,128,1,fp8,fp8,0,0.33086933692296344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,float16,0,0.21686933437983194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,4,64,0,1,fp8,fp8,0,1.4874560038248699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,12,64,128,1,float16,fp8,0,0.2230559984842936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,float16,0,0.9430027008056641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,4,64,0,1,float16,fp8,0,1.7208693822224934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,12,64,128,1,fp8,fp8,0,0.2098346749941508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,float16,0,0.1933120091756185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,1,64,128,1,float16,fp8,0,0.19542400042215982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,12,64,0,1,float16,fp8,0,0.9495999813079834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,12,64,0,1,fp8,fp8,0,0.8278986612955729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,float16,0,0.9112106959025065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,1,64,128,1,fp8,fp8,0,0.1853760083516439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,float16,0,0.19559999306996664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,1,64,0,1,fp8,fp8,0,0.7983787059783936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,1,64,0,1,float16,fp8,0,0.9163306554158529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,2,64,128,1,float16,fp8,0,0.19763733943303427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,2,64,128,1,fp8,fp8,0,0.18754667043685913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,float16,0,0.9166506926218668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,float16,0,0.20228799184163412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,2,64,0,1,float16,fp8,0,0.9184853235880533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,2,64,0,1,fp8,fp8,0,0.8010719617207845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,4,64,128,1,float16,fp8,0,0.20583999156951904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,float16,0,0.9210186799367269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,4,64,128,1,fp8,fp8,0,0.19396267334620157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,float16,0,0.15666133165359497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,4,64,0,1,float16,fp8,0,0.92413330078125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,12,64,128,1,float16,fp8,0,0.15680000185966492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,4,64,0,1,fp8,fp8,0,0.8080373605092367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,float16,0,0.559551994005839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,12,64,128,1,fp8,fp8,0,0.14754666884740195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,12,64,0,1,fp8,fp8,0,0.493066668510437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,12,64,0,1,float16,fp8,0,0.559930682182312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,float16,0,0.15267733732859293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,1,64,128,1,float16,fp8,0,0.15451199809710184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,1,64,128,1,fp8,fp8,0,0.14432000120480856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,float16,0,0.5535093148549398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,1,64,0,1,fp8,fp8,0,0.48252801100413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,1,64,0,1,float16,fp8,0,0.5528746843338013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,float16,0,0.15260799725850424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,2,64,128,1,float16,fp8,0,0.15306133031845093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,2,64,128,1,fp8,fp8,0,0.14447999993960062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,float16,0,0.5517280101776123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,float16,0,0.15434666474660239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,2,64,0,1,fp8,fp8,0,0.48334399859110516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,2,64,0,1,float16,fp8,0,0.5518026749293009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,4,64,128,1,float16,fp8,0,0.15503999590873718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,4,64,128,1,fp8,fp8,0,0.14662933349609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,float16,0,0.5520266691843668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,4,64,0,1,fp8,fp8,0,0.48633066813151044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,4,64,0,1,float16,fp8,0,0.5554613272349039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,float16,0,0.4758559862772624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,1,64,128,1,float16,fp8,0,0.48135467370351154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,1,64,128,1,fp8,fp8,0,0.43926934401194256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,float16,0,1.962762673695882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,1,64,0,1,fp8,fp8,0,1.7043253580729167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,float16,0,0.4835093418757121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,1,64,0,1,float16,fp8,0,1.970629374186198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,2,64,128,1,float16,fp8,0,0.4885866641998291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,2,64,128,1,fp8,fp8,0,0.4495573441187541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,float16,0,0.494981328646342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,float16,0,1.971669356028239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,2,64,0,1,float16,fp8,0,1.978384017944336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,4,64,128,1,float16,fp8,0,0.5013386805852255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,2,64,0,1,fp8,fp8,0,1.7128960291544597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,4,64,128,1,fp8,fp8,0,0.4639253218968709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,float16,0,0.2898719906806946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,float16,0,1.9850400288899739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,12,64,128,1,float16,fp8,0,0.29732267061869305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,4,64,0,1,fp8,fp8,0,1.7286399205525715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,4,64,0,1,float16,fp8,0,1.9940160115559895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,12,64,128,1,fp8,fp8,0,0.27932266394297284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,float16,0,1.072709321975708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,float16,0,0.25785066684087116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,12,64,0,1,float16,fp8,0,1.078506628672282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,12,64,0,1,fp8,fp8,0,0.9380693435668945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,1,64,128,1,float16,fp8,0,0.2593013246854146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,float16,0,1.029263973236084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,1,64,128,1,fp8,fp8,0,0.24226667483647665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,float16,0,0.2602506677309672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,1,64,0,1,float16,fp8,0,1.0326666831970215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,1,64,0,1,fp8,fp8,0,0.9016213417053223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,2,64,128,1,float16,fp8,0,0.2632266680399577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,float16,0,1.030448039372762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,2,64,128,1,fp8,fp8,0,0.24665067593256632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,float16,0,0.26729599634806317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,2,64,0,1,float16,fp8,0,1.0367680390675862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,2,64,0,1,fp8,fp8,0,0.9073279698689779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,4,64,128,1,float16,fp8,0,0.2712266643842061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,float16,0,1.0406506856282551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,4,64,128,1,fp8,fp8,0,0.2531893253326416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,float16,0,0.16453867157300314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,4,64,0,1,float16,fp8,0,1.0463840166727703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,float16,0,0.5890133380889893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,12,64,128,1,float16,fp8,0,0.16970133781433105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,4,64,0,1,fp8,fp8,0,0.9108479817708334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,12,64,128,1,fp8,fp8,0,0.16288000345230103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,float16,0,0.14632532993952432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,12,64,0,1,fp8,fp8,0,0.5187306801478068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,12,64,0,1,float16,fp8,0,0.5937226613362631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,1,64,128,1,float16,fp8,0,0.14654399951299033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,float16,0,0.5641333262125651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,1,64,128,1,fp8,fp8,0,0.1397599975268046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,1,64,0,1,fp8,fp8,0,0.4925599892934163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,1,64,0,1,float16,fp8,0,0.5642933448155721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,float16,0,0.14873600006103516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,float16,0,0.5645440022150675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,2,64,128,1,float16,fp8,0,0.14871467153231302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,2,64,128,1,fp8,fp8,0,0.1423679987589518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,float16,0,0.15254933635393778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,2,64,0,1,float16,fp8,0,0.5667413473129272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,2,64,0,1,fp8,fp8,0,0.4957226514816284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,4,64,128,1,float16,fp8,0,0.15477866927782694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,float16,0,0.5705013275146484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,4,64,128,1,fp8,fp8,0,0.14738667011260986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,float16,0,0.11774933338165283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,float16,0,0.3595999876658122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,4,64,0,1,float16,fp8,0,0.5721333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,4,64,0,1,fp8,fp8,0,0.5041973193486532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,12,64,128,1,float16,fp8,0,0.11778133114178975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,12,64,128,1,fp8,fp8,0,0.11332799990971883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,float16,0,0.11763733625411987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,12,64,0,1,float16,fp8,0,0.35899198055267334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,12,64,0,1,fp8,fp8,0,0.31886933247248334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,1,64,128,1,float16,fp8,0,0.11588266491889954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,1,64,128,1,fp8,fp8,0,0.1095413366953532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,float16,0,0.3569173415501912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,1,64,0,1,float16,fp8,0,0.35552533467610675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,float16,0,0.11551466584205627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,1,64,0,1,fp8,fp8,0,0.31390400727589923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,2,64,128,1,float16,fp8,0,0.11552533507347107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,float16,0,0.3553333282470703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,2,64,128,1,fp8,fp8,0,0.10939733187357585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,float16,0,0.11551466584205627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,2,64,0,1,fp8,fp8,0,0.313098669052124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,2,64,0,1,float16,fp8,0,0.3564106623331706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,4,64,128,1,float16,fp8,0,0.11591466267903645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,4,64,128,1,fp8,fp8,0,0.11155200004577637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,float16,0,0.3587413231531779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,4,64,0,1,fp8,fp8,0,0.3136213421821594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,4,64,0,1,float16,fp8,0,0.356330672899882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,float16,0,0.40086400508880615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,1,64,128,1,float16,fp8,0,0.4041546583175659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,1,64,128,1,fp8,fp8,0,0.3694346745808919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,float16,0,1.4318453470865886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,float16,0,0.4057013193766276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,1,64,0,1,float16,fp8,0,1.4385813077290852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,1,64,0,1,fp8,fp8,0,1.2466879685719807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,2,64,128,1,float16,fp8,0,0.4105653365453084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,2,64,128,1,fp8,fp8,0,0.37701865037282306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,float16,0,1.441109339396159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,2,64,0,1,fp8,fp8,0,1.253328005472819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,2,64,0,1,float16,fp8,0,1.4414079984029133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,float16,0,0.4148906469345093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,4,64,128,1,fp8,fp8,0,0.38834134737650555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,4,64,128,1,float16,fp8,0,0.4201493263244629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,float16,0,0.24296534061431885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,float16,0,1.450640042622884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,4,64,0,1,float16,fp8,0,1.4597439765930176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,12,64,128,1,float16,fp8,0,0.2490826646486918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,4,64,0,1,fp8,fp8,0,1.2637279828389485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,float16,0,0.7923839886983236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,12,64,128,1,fp8,fp8,0,0.23431466023127237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,float16,0,0.21358933051427206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,12,64,0,1,fp8,fp8,0,0.6975146929423014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,12,64,0,1,float16,fp8,0,0.7974507013956705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,1,64,128,1,float16,fp8,0,0.21590399742126465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,1,64,128,1,fp8,fp8,0,0.2018293341000875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,float16,0,0.7573066552480062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,1,64,0,1,fp8,fp8,0,0.6627093156178793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,float16,0,0.2163626750310262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,1,64,0,1,float16,fp8,0,0.7583733399709066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,2,64,128,1,float16,fp8,0,0.21920533974965414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,2,64,128,1,fp8,fp8,0,0.20639999707539877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,float16,0,0.7599199612935384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,float16,0,0.224671999613444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,2,64,0,1,float16,fp8,0,0.7636693318684896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,2,64,0,1,fp8,fp8,0,0.6671466827392578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,4,64,128,1,float16,fp8,0,0.22802132368087769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,float16,0,0.7692800362904867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,4,64,128,1,fp8,fp8,0,0.21341866254806519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,float16,0,0.1430400013923645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,float16,0,0.44087998072306317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,4,64,0,1,float16,fp8,0,0.7712106704711914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,12,64,128,1,float16,fp8,0,0.14620799819628397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,4,64,0,1,fp8,fp8,0,0.6747840245564779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,12,64,128,1,fp8,fp8,0,0.1406880021095276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,float16,0,0.12471999724706014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,12,64,0,1,fp8,fp8,0,0.39315199851989746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,12,64,0,1,float16,fp8,0,0.4461119969685872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,1,64,128,1,float16,fp8,0,0.12557866175969443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,1,64,128,1,fp8,fp8,0,0.11777599652608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,float16,0,0.42121068636576336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,1,64,0,1,float16,fp8,0,0.42231468359629315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,float16,0,0.1272053321202596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,1,64,0,1,fp8,fp8,0,0.3661919832229614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,2,64,128,1,float16,fp8,0,0.12784533699353537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,float16,0,0.42183999220530194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,2,64,128,1,fp8,fp8,0,0.12062933047612508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,2,64,0,1,float16,fp8,0,0.42156799634297687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,float16,0,0.12849600116411844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,2,64,0,1,fp8,fp8,0,0.3698666493097941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,float16,0,0.42583998044331867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,4,64,128,1,float16,fp8,0,0.13007466991742453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,4,64,128,1,fp8,fp8,0,0.12613866726557413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,float16,0,0.1033066709836324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,4,64,0,1,float16,fp8,0,0.42634665966033936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,4,64,0,1,fp8,fp8,0,0.3781333367029826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,float16,0,0.27562665939331055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,12,64,128,1,float16,fp8,0,0.10315199693044026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,12,64,128,1,fp8,fp8,0,0.09756267070770264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,float16,0,0.1020906666914622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,12,64,0,1,float16,fp8,0,0.27534933884938556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,12,64,0,1,fp8,fp8,0,0.2451840043067932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,float16,0,0.27425599098205566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,1,64,128,1,float16,fp8,0,0.10340266426404317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,1,64,128,1,fp8,fp8,0,0.0981280008951823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,1,64,0,1,float16,fp8,0,0.27353066205978394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,1,64,0,1,fp8,fp8,0,0.2424479921658834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,float16,0,0.10319466392199199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,float16,0,0.2743946711222331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,2,64,128,1,float16,fp8,0,0.10345066587130229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,2,64,128,1,fp8,fp8,0,0.09691199660301208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,2,64,0,1,float16,fp8,0,0.2738773425420125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,float16,0,0.10353599985440572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,2,64,0,1,fp8,fp8,0,0.2423093318939209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,float16,0,0.2733599940935771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,4,64,128,1,float16,fp8,0,0.10308266679445903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,4,64,128,1,fp8,fp8,0,0.09715732932090759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,4,64,0,1,float16,fp8,0,0.27534399429957074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,4,64,0,1,fp8,fp8,0,0.24232532580693564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,float16,0,0.6175466775894165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,1,64,128,1,float16,fp8,0,0.622655987739563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,1,64,128,1,fp8,fp8,0,0.5700746774673462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,float16,0,1.9001173973083496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,float16,0,0.6290773153305054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,1,64,0,1,float16,fp8,0,1.9082239468892415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,1,64,0,1,fp8,fp8,0,1.655461311340332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,2,64,128,1,float16,fp8,0,0.6344053347905477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,2,64,128,1,fp8,fp8,0,0.5822879870732626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,float16,0,1.912549336751302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,float16,0,0.6450986862182617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,2,64,0,1,float16,fp8,0,1.9211947123209636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,2,64,0,1,fp8,fp8,0,1.6666560173034668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,4,64,128,1,float16,fp8,0,0.6522080103556315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,4,64,128,1,fp8,fp8,0,0.6035093466440836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,float16,0,0.36500267187754315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,float16,0,1.9343360265096028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,12,64,128,1,float16,fp8,0,0.37218133608500165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,4,64,0,1,float16,fp8,0,1.9420053164164226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,4,64,0,1,fp8,fp8,0,1.6898612976074219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,float16,0,1.036421298980713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,12,64,128,1,fp8,fp8,0,0.34929601351420086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,float16,0,0.3224479953447978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,12,64,0,1,float16,fp8,0,1.0416266918182373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,12,64,0,1,fp8,fp8,0,0.913802703221639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,1,64,128,1,float16,fp8,0,0.3253013292948405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,float16,0,0.9808586438496908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,1,64,128,1,fp8,fp8,0,0.30018667380015057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,float16,0,0.32659733295440674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,1,64,0,1,float16,fp8,0,0.9842186768849691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,1,64,0,1,fp8,fp8,0,0.8605919679005941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,2,64,128,1,float16,fp8,0,0.3304533362388611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,float16,0,0.9861226876576742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,2,64,128,1,fp8,fp8,0,0.3043733239173889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,float16,0,0.334666649500529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,2,64,0,1,float16,fp8,0,0.9945173263549805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,2,64,0,1,fp8,fp8,0,0.8649066289265951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,4,64,128,1,float16,fp8,0,0.33894399801890057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,float16,0,0.9988266626993815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,4,64,128,1,fp8,fp8,0,0.314410666624705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,float16,0,0.19814932346343994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,float16,0,0.5543093283971151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,12,64,128,1,float16,fp8,0,0.2041119933128357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,4,64,0,1,float16,fp8,0,1.002410650253296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,4,64,0,1,fp8,fp8,0,0.8727146784464518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,12,64,128,1,fp8,fp8,0,0.19222400585810342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,float16,0,0.1735573410987854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,12,64,0,1,float16,fp8,0,0.5589226484298706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,12,64,0,1,fp8,fp8,0,0.493125319480896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,1,64,128,1,float16,fp8,0,0.17705066998799643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,1,64,128,1,fp8,fp8,0,0.16660799582799277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,float16,0,0.5234613418579102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,1,64,0,1,float16,fp8,0,0.5245866775512695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,float16,0,0.1770240068435669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,1,64,0,1,fp8,fp8,0,0.4638986587524414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,float16,0,0.5264213482538859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,2,64,128,1,float16,fp8,0,0.17933867375055948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,2,64,128,1,fp8,fp8,0,0.1686613361040751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,float16,0,0.18346132834752402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,2,64,0,1,float16,fp8,0,0.528170665105184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,2,64,0,1,fp8,fp8,0,0.46727999051411945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,float16,0,0.5331253210703532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,4,64,128,1,float16,fp8,0,0.18727999925613403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,4,64,128,1,fp8,fp8,0,0.17488000790278116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,float16,0,0.11788800358772278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,float16,0,0.3128640055656433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,4,64,0,1,float16,fp8,0,0.5372960170110067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,4,64,0,1,fp8,fp8,0,0.47092799345652264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,12,64,128,1,float16,fp8,0,0.11969600121180217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,12,64,128,1,fp8,fp8,0,0.11551466584205627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,12,64,0,1,float16,fp8,0,0.3166240056355794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,12,64,0,1,fp8,fp8,0,0.28087466955184937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,float16,0,0.10359999537467957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,float16,0,0.2978559931119283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,1,64,128,1,float16,fp8,0,0.10522133111953735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,1,64,128,1,fp8,fp8,0,0.09701866904894511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,1,64,0,1,float16,fp8,0,0.29812800884246826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,float16,0,0.10540266831715901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,1,64,0,1,fp8,fp8,0,0.26001065969467163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,2,64,128,1,float16,fp8,0,0.10540266831715901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,float16,0,0.2997866670290629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,2,64,128,1,fp8,fp8,0,0.09688533345858256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,float16,0,0.10627200206120808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,2,64,0,1,float16,fp8,0,0.298144002755483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,2,64,0,1,fp8,fp8,0,0.26210665702819824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,float16,0,0.30001600583394367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,4,64,128,1,fp8,fp8,0,0.103301336367925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,4,64,128,1,float16,fp8,0,0.10891200105349223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,4,64,0,1,float16,fp8,0,0.30296534299850464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,float16,0,0.08302933474381764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,4,64,0,1,fp8,fp8,0,0.26709334055582684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,float16,0,0.20108266671498617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,12,64,128,1,float16,fp8,0,0.0845973292986552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,12,64,128,1,fp8,fp8,0,0.07869866490364075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,12,64,0,1,float16,fp8,0,0.20164267222086588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,12,64,0,1,fp8,fp8,0,0.1797599991162618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,float16,0,0.08549867073694865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,float16,0,0.20171733697255453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,1,64,128,1,float16,fp8,0,0.08443199594815572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,1,64,128,1,fp8,fp8,0,0.0814933329820633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,1,64,0,1,float16,fp8,0,0.20156800746917725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,1,64,0,1,fp8,fp8,0,0.17906665802001953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,float16,0,0.0846026639143626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,2,64,128,1,float16,fp8,0,0.08276799817879994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,float16,0,0.20150399208068848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,2,64,128,1,fp8,fp8,0,0.08029866715272267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,2,64,0,1,fp8,fp8,0,0.17909866571426392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,2,64,0,1,float16,fp8,0,0.20132799943288168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,float16,0,0.0848640004793803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,4,64,128,1,float16,fp8,0,0.08288000027338664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,float16,0,0.20152533054351807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,4,64,128,1,fp8,fp8,0,0.08113066852092743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,4,64,0,1,float16,fp8,0,0.20084800322850546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,4,64,0,1,fp8,fp8,0,0.17779199282328287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,float16,0,0.466261347134908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,1,64,128,1,float16,fp8,0,0.46836801369984943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,1,64,128,1,fp8,fp8,0,0.4273759921391805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,float16,0,1.1766613324483235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,float16,0,0.473962664604187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,1,64,0,1,float16,fp8,0,1.1809279918670654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,1,64,0,1,fp8,fp8,0,1.0281706651051838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,2,64,128,1,float16,fp8,0,0.4772533178329468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,2,64,128,1,fp8,fp8,0,0.4373493194580078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,float16,0,1.1863359610239665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,float16,0,0.48413864771525067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,2,64,0,1,float16,fp8,0,1.1921919981638591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,2,64,0,1,fp8,fp8,0,1.0399946371714275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,4,64,128,1,float16,fp8,0,0.48863999048868817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,4,64,128,1,fp8,fp8,0,0.45341865221659344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,float16,0,1.199178695678711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,float16,0,0.2795199950536092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,12,64,128,1,float16,fp8,0,0.2857919931411743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,4,64,0,1,fp8,fp8,0,1.0514559745788574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,4,64,0,1,float16,fp8,0,1.2090293566385906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,float16,0,0.6546880006790161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,12,64,128,1,fp8,fp8,0,0.2655679980913798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,float16,0,0.24591465791066489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,12,64,0,1,float16,fp8,0,0.6606133381525675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,12,64,0,1,fp8,fp8,0,0.5846933523813883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,float16,0,0.6135093371073405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,1,64,128,1,float16,fp8,0,0.24740799268086752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,1,64,128,1,fp8,fp8,0,0.2299733360608419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,float16,0,0.2488266626993815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,1,64,0,1,float16,fp8,0,0.6164799928665161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,1,64,0,1,fp8,fp8,0,0.542522668838501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,float16,0,0.6194026470184326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,2,64,128,1,fp8,fp8,0,0.2337013284365336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,2,64,128,1,float16,fp8,0,0.25203200181325275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,float16,0,0.2550879915555318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,2,64,0,1,float16,fp8,0,0.6213546593983968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,2,64,0,1,fp8,fp8,0,0.5482346614201864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,float16,0,0.6297493378321329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,4,64,128,1,float16,fp8,0,0.2584853370984395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,4,64,128,1,fp8,fp8,0,0.24155199527740479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,float16,0,0.15126400192578635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,4,64,0,1,float16,fp8,0,0.6317493518193563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,4,64,0,1,fp8,fp8,0,0.5558506647745768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,float16,0,0.3580426772435506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,12,64,128,1,float16,fp8,0,0.15677866339683533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,12,64,128,1,fp8,fp8,0,0.14860799908638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,float16,0,0.13194666306177774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,12,64,0,1,fp8,fp8,0,0.32159467538197833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,12,64,0,1,float16,fp8,0,0.3603573242823283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,float16,0,0.3309066692988078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,1,64,128,1,float16,fp8,0,0.13337600231170654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,1,64,128,1,fp8,fp8,0,0.12382400035858154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,1,64,0,1,float16,fp8,0,0.33478931585947674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,float16,0,0.13220799962679544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,1,64,0,1,fp8,fp8,0,0.2925013303756714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,float16,0,0.33267199993133545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,2,64,128,1,float16,fp8,0,0.13598933815956116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,2,64,128,1,fp8,fp8,0,0.12788800398508707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,2,64,0,1,float16,fp8,0,0.3354346752166748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,float16,0,0.13849600156148276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,2,64,0,1,fp8,fp8,0,0.297818660736084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,float16,0,0.338373343149821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,4,64,128,1,float16,fp8,0,0.14038399855295816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,4,64,128,1,fp8,fp8,0,0.134442667166392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,float16,0,0.08685866991678874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,4,64,0,1,float16,fp8,0,0.34276266892751056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,4,64,0,1,fp8,fp8,0,0.3040213386217753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,float16,0,0.2063466707865397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,12,64,128,1,float16,fp8,0,0.08931733171145122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,12,64,128,1,fp8,fp8,0,0.08826667070388794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,12,64,0,1,float16,fp8,0,0.20680000384648642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,float16,0,0.07881600161393483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,12,64,0,1,fp8,fp8,0,0.18781334161758423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,float16,0,0.19683200120925903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,1,64,128,1,float16,fp8,0,0.07905066510041554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,1,64,128,1,fp8,fp8,0,0.0743146687746048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,1,64,0,1,float16,fp8,0,0.19709332784016928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,float16,0,0.07894399762153625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,1,64,0,1,fp8,fp8,0,0.1722559928894043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,float16,0,0.1974239945411682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,2,64,128,1,float16,fp8,0,0.08014933268229167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,2,64,128,1,fp8,fp8,0,0.0746613343556722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,2,64,0,1,fp8,fp8,0,0.17328532536824545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,2,64,0,1,float16,fp8,0,0.19711466630299887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,float16,0,0.0804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,4,64,128,1,float16,fp8,0,0.082805335521698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,4,64,128,1,fp8,fp8,0,0.07693333427111308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,float16,0,0.19761600097020468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,float16,0,0.06434666613737743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,4,64,0,1,fp8,fp8,0,0.1755146582921346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,4,64,0,1,float16,fp8,0,0.1997226675351461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,float16,0,0.14037866393725076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,12,64,128,1,float16,fp8,0,0.06635199983914693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,12,64,128,1,fp8,fp8,0,0.0625493327776591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,12,64,0,1,float16,fp8,0,0.14065600434939066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,12,64,0,1,fp8,fp8,0,0.12681066989898682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,float16,0,0.06405866642793019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,float16,0,0.1420906682809194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,1,64,128,1,float16,fp8,0,0.06433066725730896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,1,64,128,1,fp8,fp8,0,0.062394668658574425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,1,64,0,1,float16,fp8,0,0.14206399520238241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,1,64,0,1,fp8,fp8,0,0.12609600027402243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,float16,0,0.06489066779613495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,2,64,128,1,float16,fp8,0,0.06608533362547557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,2,64,128,1,fp8,fp8,0,0.06243200103441874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,float16,0,0.1421013375123342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,float16,0,0.0662613312403361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,2,64,0,1,float16,fp8,0,0.14262400070826212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,2,64,0,1,fp8,fp8,0,0.12589333454767862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,4,64,128,1,float16,fp8,0,0.06485333542029063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,float16,0,0.14097066720326742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,4,64,128,1,fp8,fp8,0,0.06238399942715963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,4,64,0,1,fp8,fp8,0,0.12583999832471213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,4,64,0,1,float16,fp8,0,0.14204800128936768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,float16,0,0.6145066817601522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,1,64,128,1,float16,fp8,0,0.6192373434702555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,float16,0,1.2050560315450032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,1,64,128,1,fp8,fp8,0,0.5646026531855265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,1,64,0,1,float16,fp8,0,1.2134186426798503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,float16,0,0.627407987912496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,1,64,0,1,fp8,fp8,0,1.0612640380859375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,2,64,128,1,float16,fp8,0,0.6329333384831747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,2,64,128,1,fp8,fp8,0,0.57860799630483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,float16,0,1.2244746685028076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,float16,0,0.646448016166687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,2,64,0,1,float16,fp8,0,1.2295573552449544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,2,64,0,1,fp8,fp8,0,1.076037327448527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,float16,0,1.2488640149434407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,4,64,128,1,float16,fp8,0,0.6502559979756674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,4,64,128,1,fp8,fp8,0,0.5992586612701416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,float16,0,0.3578346570332845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,4,64,0,1,float16,fp8,0,1.2476426760355632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,4,64,0,1,fp8,fp8,0,1.1005226771036785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,float16,0,0.6779253482818604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,12,64,128,1,float16,fp8,0,0.36482131481170654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,12,64,128,1,fp8,fp8,0,0.3409493366877238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,float16,0,0.3144693374633789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,12,64,0,1,fp8,fp8,0,0.6063946485519409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,12,64,0,1,float16,fp8,0,0.6846826871236166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,float16,0,0.6207573413848877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,1,64,128,1,float16,fp8,0,0.31672000885009766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,1,64,128,1,fp8,fp8,0,0.2918613354365031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,1,64,0,1,float16,fp8,0,0.6217973232269287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,1,64,0,1,fp8,fp8,0,0.5496213436126709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,float16,0,0.3196000059445699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,float16,0,0.6263146797815958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,2,64,128,1,float16,fp8,0,0.323578675587972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,2,64,128,1,fp8,fp8,0,0.2982026735941569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,2,64,0,1,float16,fp8,0,0.6279946565628052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,float16,0,0.32792532444000244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,2,64,0,1,fp8,fp8,0,0.5567466815312704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,float16,0,0.6364479859670004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,4,64,128,1,float16,fp8,0,0.33077865839004517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,4,64,128,1,fp8,fp8,0,0.3062079946200053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,float16,0,0.1913493275642395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,4,64,0,1,float16,fp8,0,0.6417866547902426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,4,64,0,1,fp8,fp8,0,0.5658186674118042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,float16,0,0.3591146469116211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,12,64,128,1,float16,fp8,0,0.19555733601252237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,12,64,128,1,fp8,fp8,0,0.1840533415476481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,12,64,0,1,fp8,fp8,0,0.32438933849334717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,float16,0,0.1651946703592936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,12,64,0,1,float16,fp8,0,0.3639093240102132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,1,64,128,1,float16,fp8,0,0.16686934232711792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,float16,0,0.3264213403065999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,1,64,128,1,fp8,fp8,0,0.15862933794657388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,1,64,0,1,float16,fp8,0,0.3285333315531413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,float16,0,0.16777600844701132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,1,64,0,1,fp8,fp8,0,0.2941280007362366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,float16,0,0.32870932420094806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,2,64,128,1,float16,fp8,0,0.16995733976364136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,2,64,128,1,fp8,fp8,0,0.16085867087046304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,2,64,0,1,float16,fp8,0,0.3314666748046875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,float16,0,0.17524800697962442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,2,64,0,1,fp8,fp8,0,0.29688533147176105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,float16,0,0.337775985399882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,4,64,128,1,float16,fp8,0,0.17719467480977377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,4,64,128,1,fp8,fp8,0,0.16739734013875326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,4,64,0,1,float16,fp8,0,0.3396693468093872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,float16,0,0.10787199934323628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,float16,0,0.1998186707496643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,4,64,0,1,fp8,fp8,0,0.3053013285001119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,12,64,128,1,float16,fp8,0,0.11006933450698853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,12,64,128,1,fp8,fp8,0,0.1065120001633962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,12,64,0,1,float16,fp8,0,0.2019253373146057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,12,64,0,1,fp8,fp8,0,0.18322134017944336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,float16,0,0.09301867087682088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,1,64,128,1,float16,fp8,0,0.09410132964452107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,float16,0,0.18348799149195352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,1,64,128,1,fp8,fp8,0,0.08706667025883992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,float16,0,0.09529067079226176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,1,64,0,1,float16,fp8,0,0.18352532386779785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,1,64,0,1,fp8,fp8,0,0.16294399897257486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,2,64,128,1,float16,fp8,0,0.09538666407267253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,2,64,128,1,fp8,fp8,0,0.08898666501045227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,float16,0,0.1846933364868164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,float16,0,0.09674666325251262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,2,64,0,1,float16,fp8,0,0.18564265966415405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,2,64,0,1,fp8,fp8,0,0.16498667001724243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,4,64,128,1,float16,fp8,0,0.09930666287740071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,4,64,128,1,fp8,fp8,0,0.09317866961161296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,float16,0,0.185808002948761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,float16,0,0.062319998939832054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,4,64,0,1,float16,fp8,0,0.18853867053985596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,4,64,0,1,fp8,fp8,0,0.16927999258041382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,float16,0,0.11782399813334148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,12,64,128,1,float16,fp8,0,0.06494933366775513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,12,64,128,1,fp8,fp8,0,0.06423999865849812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,12,64,0,1,float16,fp8,0,0.12145599722862244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,float16,0,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,12,64,0,1,fp8,fp8,0,0.10745599865913391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,1,64,128,1,float16,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,float16,0,0.1158026655515035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,1,64,128,1,fp8,fp8,0,0.05558399856090546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,float16,0,0.05957333246866862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,1,64,0,1,fp8,fp8,0,0.10124799609184265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,1,64,0,1,float16,fp8,0,0.11563733220100403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,2,64,128,1,float16,fp8,0,0.060218666990598045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,2,64,128,1,fp8,fp8,0,0.05663466453552246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,float16,0,0.114138662815094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,2,64,0,1,float16,fp8,0,0.11617599924405415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,float16,0,0.06016000111897787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,2,64,0,1,fp8,fp8,0,0.10354666908582051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,4,64,128,1,float16,fp8,0,0.062234664956728615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,float16,0,0.1167039970556895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,4,64,128,1,fp8,fp8,0,0.05750933289527893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,4,64,0,1,float16,fp8,0,0.11562666296958923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,float16,0,0.052101333936055504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,4,64,0,1,fp8,fp8,0,0.1032480001449585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,12,64,128,1,fp8,fp8,0,0.0496373325586319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,float16,0,0.08517866333325703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,12,64,128,1,float16,fp8,0,0.05204799771308899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,12,64,0,1,float16,fp8,0,0.08588266372680664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,12,64,0,1,fp8,fp8,0,0.07866666714350383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,float16,0,0.052111998200416565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,float16,0,0.08691733082135518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,1,64,128,1,float16,fp8,0,0.051856001218159996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,1,64,128,1,fp8,fp8,0,0.050101334849993386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,float16,0,0.05221333106358846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,1,64,0,1,float16,fp8,0,0.0869760016600291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,1,64,0,1,fp8,fp8,0,0.07866133252779643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,2,64,128,1,float16,fp8,0,0.052255998055140175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,2,64,128,1,fp8,fp8,0,0.050250664353370667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,float16,0,0.08706667025883992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,2,64,0,1,fp8,fp8,0,0.07656533519426982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,2,64,0,1,float16,fp8,0,0.08568533261617024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,float16,0,0.052255998055140175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,4,64,128,1,float16,fp8,0,0.05186133086681366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,float16,0,0.08673066894213359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,4,64,128,1,fp8,fp8,0,0.048207998275756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,4,64,0,1,float16,fp8,0,0.08515200018882751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,4,64,0,1,fp8,fp8,0,0.07699733475844066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,float16,0,0.46297065416971844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,1,64,128,1,float16,fp8,0,0.4647093216578166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,1,64,128,1,fp8,fp8,0,0.42582400639851886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,float16,0,0.7777973016103109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,1,64,0,1,float16,fp8,0,0.7846666971842448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,1,64,0,1,fp8,fp8,0,0.6896639664967855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,float16,0,0.4705599943796794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,float16,0,0.7871039708455404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,2,64,128,1,float16,fp8,0,0.47396798928578693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,2,64,128,1,fp8,fp8,0,0.432805339495341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,2,64,0,1,float16,fp8,0,0.79202667872111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,float16,0,0.48285333315531415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,2,64,0,1,fp8,fp8,0,0.700656016667684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,float16,0,0.7999893029530843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,4,64,128,1,float16,fp8,0,0.48602132002512616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,4,64,128,1,fp8,fp8,0,0.4477386474609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,4,64,0,1,float16,fp8,0,0.8050933678944906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,float16,0,0.2726400097211202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,4,64,0,1,fp8,fp8,0,0.7147146860758463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,float16,0,0.4447946548461914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,12,64,128,1,float16,fp8,0,0.27747199932734173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,12,64,128,1,fp8,fp8,0,0.26179200410842896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,12,64,0,1,fp8,fp8,0,0.4030666748682658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,float16,0,0.23708800474802652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,12,64,0,1,float16,fp8,0,0.45003732045491535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,float16,0,0.404800017674764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,1,64,128,1,float16,fp8,0,0.24053333202997842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,1,64,128,1,fp8,fp8,0,0.2224959929784139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,1,64,0,1,float16,fp8,0,0.406826655069987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,float16,0,0.24106132984161377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,1,64,0,1,fp8,fp8,0,0.3633973201115926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,float16,0,0.40650665760040283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,2,64,128,1,float16,fp8,0,0.24566932519276938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,2,64,128,1,fp8,fp8,0,0.22864532470703125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,2,64,0,1,float16,fp8,0,0.41069332758585614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,2,64,0,1,fp8,fp8,0,0.36925868193308514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,float16,0,0.2505439917246501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,4,64,128,1,float16,fp8,0,0.25405333439509076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,float16,0,0.4150826533635457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,4,64,128,1,fp8,fp8,0,0.23491199811299643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,float16,0,0.14643733700116476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,4,64,0,1,float16,fp8,0,0.4190933307011922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,4,64,0,1,fp8,fp8,0,0.37390931447347003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,12,64,128,1,float16,fp8,0,0.15009599924087524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,float16,0,0.2405280073483785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,12,64,128,1,fp8,fp8,0,0.14206399520238241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,12,64,0,1,float16,fp8,0,0.24304000536600748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,float16,0,0.12381333112716675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,12,64,0,1,fp8,fp8,0,0.2195146679878235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,1,64,128,1,float16,fp8,0,0.12584533294041952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,float16,0,0.21436800559361777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,1,64,128,1,fp8,fp8,0,0.11801066994667053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,1,64,0,1,float16,fp8,0,0.21764800945917764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,float16,0,0.12611732880274454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,1,64,0,1,fp8,fp8,0,0.19362666209538779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,2,64,128,1,float16,fp8,0,0.1286186675230662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,2,64,128,1,fp8,fp8,0,0.1223360002040863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,float16,0,0.21634133656819662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,2,64,0,1,float16,fp8,0,0.21843733390172324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,float16,0,0.13194132844607034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,2,64,0,1,fp8,fp8,0,0.19848533471425375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,4,64,128,1,float16,fp8,0,0.13412800431251526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,4,64,128,1,fp8,fp8,0,0.12940266728401184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,float16,0,0.22110400597254434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,float16,0,0.07877333462238312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,4,64,0,1,fp8,fp8,0,0.2048906683921814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,4,64,0,1,float16,fp8,0,0.22393600145975748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,float16,0,0.13425599535306296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,12,64,128,1,float16,fp8,0,0.08321066697438557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,12,64,128,1,fp8,fp8,0,0.08121599753697713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,12,64,0,1,float16,fp8,0,0.13607466220855713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,float16,0,0.0727040022611618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,12,64,0,1,fp8,fp8,0,0.12596799929936728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,float16,0,0.12609600027402243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,1,64,128,1,float16,fp8,0,0.07303466896216075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,1,64,128,1,fp8,fp8,0,0.06673599779605865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,float16,0,0.07261333366235097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,1,64,0,1,float16,fp8,0,0.1272479991118113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,1,64,0,1,fp8,fp8,0,0.1116480032602946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,2,64,128,1,float16,fp8,0,0.07241066793600719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,2,64,128,1,fp8,fp8,0,0.06864533325036366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,float16,0,0.12603200475374857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,2,64,0,1,float16,fp8,0,0.12596799929936728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,float16,0,0.07258666555086772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,2,64,0,1,fp8,fp8,0,0.11212799946467082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,float16,0,0.1279306709766388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,4,64,128,1,float16,fp8,0,0.07468266785144806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,4,64,128,1,fp8,fp8,0,0.07037333150704701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,float16,0,0.05011733373006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,4,64,0,1,float16,fp8,0,0.12868799765904745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,4,64,0,1,fp8,fp8,0,0.1151146690050761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,float16,0,0.08527466654777527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,12,64,128,1,float16,fp8,0,0.05012799799442291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,12,64,128,1,fp8,fp8,0,0.04935466746489207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,12,64,0,1,float16,fp8,0,0.0867786705493927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,12,64,0,1,fp8,fp8,0,0.07902400195598602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,float16,0,0.04790933430194855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,1,64,128,1,float16,fp8,0,0.047824000318845115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,float16,0,0.08386666576067607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,1,64,128,1,fp8,fp8,0,0.044213334719340004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,float16,0,0.046821330984433494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,1,64,0,1,float16,fp8,0,0.08444799979527791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,1,64,0,1,fp8,fp8,0,0.07433066765467326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,float16,0,0.08272000153859456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,2,64,128,1,float16,fp8,0,0.047653332352638245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,2,64,128,1,fp8,fp8,0,0.04372266431649526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,2,64,0,1,float16,fp8,0,0.08285333216190338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,2,64,0,1,fp8,fp8,0,0.07546666761239369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,float16,0,0.04808533191680908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,4,64,128,1,float16,fp8,0,0.04770666857560476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,4,64,128,1,fp8,fp8,0,0.04619733492533366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,float16,0,0.08317866424719493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,4,64,0,1,float16,fp8,0,0.08473066488901775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,4,64,0,1,fp8,fp8,0,0.07551466425259908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,float16,0,0.040021332601706185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,float16,0,0.0602400004863739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,12,64,128,1,float16,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,12,64,128,1,fp8,fp8,0,0.03755733370780945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,12,64,0,1,float16,fp8,0,0.06080533564090729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,12,64,0,1,fp8,fp8,0,0.05638400216897329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,float16,0,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,float16,0,0.0603413333495458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,1,64,128,1,float16,fp8,0,0.03956266740957896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,1,64,128,1,fp8,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,1,64,0,1,float16,fp8,0,0.060533334811528526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,1,64,0,1,fp8,fp8,0,0.05402133365472158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,float16,0,0.03915199885765711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,float16,0,0.06048533320426941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,2,64,128,1,float16,fp8,0,0.039493332306543984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,2,64,128,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,2,64,0,1,float16,fp8,0,0.061477333307266235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,2,64,0,1,fp8,fp8,0,0.055386667450269066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,float16,0,0.03948266555865606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,float16,0,0.06185600161552429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,4,64,128,1,float16,fp8,0,0.03925333420435587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,4,64,128,1,fp8,fp8,0,0.03792533278465271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,4,64,0,1,float16,fp8,0,0.060362666845321655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,4,64,0,1,fp8,fp8,0,0.05596800148487091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,float16,0,0.6470880111058553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,1,64,128,1,float16,fp8,0,0.6462773482004801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,float16,0,0.8923892974853516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,1,64,128,1,fp8,fp8,0,0.5822773377100626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,1,64,0,1,float16,fp8,0,0.8939200242360433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,1,64,0,1,fp8,fp8,0,0.7863360246022543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,float16,0,0.662448008855184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,2,64,128,1,float16,fp8,0,0.6613440116246542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,float16,0,0.9093226591746012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,2,64,128,1,fp8,fp8,0,0.5865600109100342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,2,64,0,1,float16,fp8,0,0.9074719746907552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,float16,0,0.6780213514963785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,2,64,0,1,fp8,fp8,0,0.7906186580657959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,4,64,128,1,fp8,fp8,0,0.6074986855189005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,4,64,128,1,float16,fp8,0,0.6752373377482096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,float16,0,0.9274133046468099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,float16,0,0.3692213296890259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,4,64,0,1,fp8,fp8,0,0.8128533363342285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,4,64,0,1,float16,fp8,0,0.9274933338165283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,float16,0,0.50327467918396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,12,64,128,1,fp8,fp8,0,0.34807467460632324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,12,64,128,1,float16,fp8,0,0.3703039884567261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,12,64,0,1,float16,fp8,0,0.5039199988047282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,12,64,0,1,fp8,fp8,0,0.45837334791819256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,float16,0,0.31997867425282794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,float16,0,0.4443146785100301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,1,64,128,1,float16,fp8,0,0.32119999329249066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,1,64,128,1,fp8,fp8,0,0.29416000843048096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,1,64,0,1,float16,fp8,0,0.4471999804178874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,1,64,0,1,fp8,fp8,0,0.39899734656016034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,float16,0,0.3238240083058675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,float16,0,0.45075734456380206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,2,64,128,1,float16,fp8,0,0.32687467336654663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,2,64,128,1,fp8,fp8,0,0.30010666449864704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,2,64,0,1,float16,fp8,0,0.45204798380533856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,2,64,0,1,fp8,fp8,0,0.4065706729888916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,float16,0,0.3352106809616089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,float16,0,0.46113598346710205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,4,64,128,1,fp8,fp8,0,0.31152000029881793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,4,64,128,1,float16,fp8,0,0.33745598793029785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,4,64,0,1,float16,fp8,0,0.465338667233785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,float16,0,0.1920213301976522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,4,64,0,1,fp8,fp8,0,0.4166400035222371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,float16,0,0.2627200086911519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,12,64,128,1,float16,fp8,0,0.1941386659940084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,12,64,128,1,fp8,fp8,0,0.18295466899871826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,12,64,0,1,float16,fp8,0,0.2655093272527059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,12,64,0,1,fp8,fp8,0,0.24239999055862427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,float16,0,0.16273599863052368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,float16,0,0.22852800289789835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,1,64,128,1,float16,fp8,0,0.16453867157300314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,1,64,128,1,fp8,fp8,0,0.15543466806411743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,1,64,0,1,float16,fp8,0,0.23105599482854208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,1,64,0,1,fp8,fp8,0,0.21212265888849893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,float16,0,0.1651573379834493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,float16,0,0.23161067565282187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,2,64,128,1,float16,fp8,0,0.16711467504501343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,2,64,128,1,fp8,fp8,0,0.15878933668136597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,2,64,0,1,float16,fp8,0,0.23261332511901855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,2,64,0,1,fp8,fp8,0,0.2140000065167745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,float16,0,0.17450666427612305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,float16,0,0.2409013311068217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,4,64,128,1,float16,fp8,0,0.17495999733606973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,4,64,128,1,fp8,fp8,0,0.16421866416931152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,float16,0,0.10339732964833577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,4,64,0,1,float16,fp8,0,0.2429706652959188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,4,64,0,1,fp8,fp8,0,0.22212799390157065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,float16,0,0.14063466588656107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,12,64,128,1,float16,fp8,0,0.10531733433405559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,12,64,128,1,fp8,fp8,0,0.10335466265678406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,12,64,0,1,float16,fp8,0,0.14475733041763306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,12,64,0,1,fp8,fp8,0,0.13476799925168356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,float16,0,0.0890773336092631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,float16,0,0.12659733494122824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,1,64,128,1,float16,fp8,0,0.08992532889048259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,1,64,128,1,fp8,fp8,0,0.08294400076071422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,1,64,0,1,float16,fp8,0,0.12785599629084268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,1,64,0,1,fp8,fp8,0,0.11379200220108032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,float16,0,0.08900266885757446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,float16,0,0.1269706686337789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,2,64,128,1,float16,fp8,0,0.09087999661763509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,2,64,128,1,fp8,fp8,0,0.08293333152929942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,2,64,0,1,float16,fp8,0,0.12781866391499838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,2,64,0,1,fp8,fp8,0,0.11556800206502278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,float16,0,0.09106666843096416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,float16,0,0.12805333733558655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,4,64,128,1,float16,fp8,0,0.09436800082524617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,4,64,128,1,fp8,fp8,0,0.08771199981371562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,4,64,0,1,float16,fp8,0,0.1318826675415039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,4,64,0,1,fp8,fp8,0,0.11946666240692139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,float16,0,0.058965335289637245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,float16,0,0.08074666559696198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,12,64,128,1,float16,fp8,0,0.06102933486302694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,12,64,128,1,fp8,fp8,0,0.05755733450253805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,12,64,0,1,float16,fp8,0,0.08497599760691325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,12,64,0,1,fp8,fp8,0,0.07691200077533722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,float16,0,0.05607999861240387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,float16,0,0.07868266602357228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,1,64,128,1,float16,fp8,0,0.05605866511662801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,1,64,128,1,fp8,fp8,0,0.05179200073083242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,1,64,0,1,float16,fp8,0,0.07877333462238312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,1,64,0,1,fp8,fp8,0,0.07051733136177063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,float16,0,0.05648533503214518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,float16,0,0.07875733574231465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,2,64,128,1,float16,fp8,0,0.05621333420276642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,2,64,128,1,fp8,fp8,0,0.052149335543314614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,2,64,0,1,fp8,fp8,0,0.07038400073846181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,2,64,0,1,float16,fp8,0,0.08046400050322215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,float16,0,0.05634133517742157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,float16,0,0.0788266658782959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,4,64,128,1,float16,fp8,0,0.05797866483529409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,4,64,128,1,fp8,fp8,0,0.052629331747690834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,4,64,0,1,float16,fp8,0,0.08077333370844524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,4,64,0,1,fp8,fp8,0,0.07267199953397115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,float16,0,0.04201599955558777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,float16,0,0.057674666245778404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,12,64,128,1,float16,fp8,0,0.042175998290379844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,12,64,128,1,fp8,fp8,0,0.041093334555625916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,12,64,0,1,float16,fp8,0,0.06018666426340739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,12,64,0,1,fp8,fp8,0,0.052239999175071716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,float16,0,0.056405335664749146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,float16,0,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,1,64,128,1,float16,fp8,0,0.039733332892258964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,1,64,128,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,1,64,0,1,float16,fp8,0,0.0553653339544932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,1,64,0,1,fp8,fp8,0,0.05035733183224996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,float16,0,0.03990933299064636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,2,64,128,1,float16,fp8,0,0.041749333341916404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,float16,0,0.05547733108202616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,2,64,128,1,fp8,fp8,0,0.03851199895143509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,2,64,0,1,float16,fp8,0,0.05667200187842051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,2,64,0,1,fp8,fp8,0,0.049866666396458946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,float16,0,0.04141333450873693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,float16,0,0.056474665800730385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,4,64,128,1,float16,fp8,0,0.04192000130812327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,4,64,128,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,4,64,0,1,float16,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,4,64,0,1,fp8,fp8,0,0.050517335534095764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,float16,0,0.04444799820582072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,12,64,128,1,float16,fp8,0,0.0334346666932106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,12,64,128,1,fp8,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,12,64,0,1,float16,fp8,0,0.046122665206591286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,12,64,0,1,fp8,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,float16,0,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,float16,0,0.044250667095184326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,1,64,128,1,float16,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,1,64,128,1,fp8,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,1,64,0,1,float16,fp8,0,0.04563199977080027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,1,64,0,1,fp8,fp8,0,0.04190933207670847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,float16,0,0.03341866781314214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,float16,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,2,64,128,1,float16,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,2,64,128,1,fp8,fp8,0,0.03178133318821589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,2,64,0,1,float16,fp8,0,0.04514666895071665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,2,64,0,1,fp8,fp8,0,0.04162133236726125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,float16,0,0.0335359995563825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,float16,0,0.044922664761543274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,4,64,128,1,float16,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,4,64,128,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,4,64,0,1,float16,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,4,64,0,1,fp8,fp8,0,0.04171200096607208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,float16,0,0.45874667167663574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,1,64,128,1,float16,fp8,0,0.46295468012491864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,float16,0,0.5800000031789144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,1,64,128,1,fp8,fp8,0,0.42052801450093585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,1,64,0,1,fp8,fp8,0,0.5211040178934733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,1,64,0,1,float16,fp8,0,0.582757314046224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,float16,0,0.4740000168482463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,float16,0,0.5940746863683065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,2,64,128,1,fp8,fp8,0,0.4355893135070801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,2,64,128,1,float16,fp8,0,0.4782826503117879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,2,64,0,1,float16,fp8,0,0.5977173248926798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,2,64,0,1,fp8,fp8,0,0.5313599904378256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,float16,0,0.4850613276163737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,float16,0,0.6095519860585531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,4,64,128,1,float16,fp8,0,0.4872266848882039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,4,64,128,1,fp8,fp8,0,0.44520533084869385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,float16,0,0.27107733488082886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,4,64,0,1,float16,fp8,0,0.6117813189824423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,4,64,0,1,fp8,fp8,0,0.5476799805959066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,float16,0,0.3392000198364258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,12,64,128,1,float16,fp8,0,0.2779360016187032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,12,64,128,1,fp8,fp8,0,0.2623680035273234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,12,64,0,1,float16,fp8,0,0.34778134028116864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,12,64,0,1,fp8,fp8,0,0.3173759977022807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,float16,0,0.23462400833765665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,float16,0,0.2977760036786397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,1,64,128,1,float16,fp8,0,0.2379680077234904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,1,64,128,1,fp8,fp8,0,0.2211893399556478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,1,64,0,1,float16,fp8,0,0.3007253408432007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,1,64,0,1,fp8,fp8,0,0.2741919954617818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,float16,0,0.23907732963562012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,float16,0,0.30161599318186444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,2,64,128,1,float16,fp8,0,0.2421226700146993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,2,64,128,1,fp8,fp8,0,0.22830400864283243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,2,64,0,1,float16,fp8,0,0.3053920070330302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,2,64,0,1,fp8,fp8,0,0.2794666687647502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,float16,0,0.2486720085144043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,float16,0,0.31160000960032147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,4,64,128,1,float16,fp8,0,0.25252266724904376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,4,64,128,1,fp8,fp8,0,0.23254400491714478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,4,64,0,1,float16,fp8,0,0.3149333397547404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,float16,0,0.14459733168284097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,4,64,0,1,fp8,fp8,0,0.28577067454655963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,float16,0,0.1812373399734497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,12,64,128,1,float16,fp8,0,0.14894400040308634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,12,64,128,1,fp8,fp8,0,0.14094932874043783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,12,64,0,1,float16,fp8,0,0.18544532855351767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,12,64,0,1,fp8,fp8,0,0.17108800013860068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,float16,0,0.12185600399971008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,float16,0,0.15549866358439127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,1,64,128,1,fp8,fp8,0,0.11565867066383362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,1,64,128,1,float16,fp8,0,0.12403200070063274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,1,64,0,1,float16,fp8,0,0.15891733765602112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,1,64,0,1,fp8,fp8,0,0.14434132973353067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,float16,0,0.12390933434168498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,float16,0,0.15901866555213928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,2,64,128,1,float16,fp8,0,0.12648533781369528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,2,64,128,1,fp8,fp8,0,0.11975466211636861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,2,64,0,1,float16,fp8,0,0.16110933820406595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,2,64,0,1,fp8,fp8,0,0.14805866281191507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,float16,0,0.1281599998474121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,float16,0,0.1632373332977295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,4,64,128,1,float16,fp8,0,0.1316480040550232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,4,64,128,1,fp8,fp8,0,0.12602667013804117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,4,64,0,1,float16,fp8,0,0.1663040022055308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,float16,0,0.07828266421953838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,4,64,0,1,fp8,fp8,0,0.1544533371925354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,float16,0,0.09854933619499207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,12,64,128,1,float16,fp8,0,0.0814879983663559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,12,64,128,1,fp8,fp8,0,0.0794239987929662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,12,64,0,1,float16,fp8,0,0.1016426682472229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,float16,0,0.06901866694291432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,12,64,0,1,fp8,fp8,0,0.09738666812578838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,float16,0,0.0912000040213267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,1,64,128,1,float16,fp8,0,0.070592001080513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,1,64,128,1,fp8,fp8,0,0.06474666794141133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,1,64,0,1,float16,fp8,0,0.09114133318265279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,1,64,0,1,fp8,fp8,0,0.08097066481908162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,float16,0,0.06890133519967397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,2,64,128,1,float16,fp8,0,0.07070399820804596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,float16,0,0.09078933795293172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,2,64,128,1,fp8,fp8,0,0.06595199803511302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,2,64,0,1,fp8,fp8,0,0.08246933420499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,2,64,0,1,float16,fp8,0,0.090938667456309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,float16,0,0.07083733379840851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,4,64,128,1,float16,fp8,0,0.07228266696135204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,float16,0,0.09128000338872273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,4,64,128,1,fp8,fp8,0,0.06865600248177846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,float16,0,0.04610666632652283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,4,64,0,1,float16,fp8,0,0.09385066231091817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,4,64,0,1,fp8,fp8,0,0.0846613347530365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,12,64,128,1,float16,fp8,0,0.047781333327293396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,12,64,128,1,fp8,fp8,0,0.04587733248869578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,float16,0,0.06237866481145223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,float16,0,0.04369066655635834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,12,64,0,1,float16,fp8,0,0.06303999821345012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,float16,0,0.05901333192984263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,12,64,0,1,fp8,fp8,0,0.05927466849486033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,1,64,128,1,float16,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,1,64,128,1,fp8,fp8,0,0.041893333196640015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,1,64,0,1,float16,fp8,0,0.05852800110975901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,float16,0,0.043578664461771645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,1,64,0,1,fp8,fp8,0,0.054661333560943604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,float16,0,0.05819199979305267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,2,64,128,1,float16,fp8,0,0.04357333481311798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,2,64,128,1,fp8,fp8,0,0.041477332512537636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,2,64,0,1,fp8,fp8,0,0.05256533126036326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,2,64,0,1,float16,fp8,0,0.060906668504079185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,float16,0,0.043951998154322304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,float16,0,0.05852800110975901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,4,64,128,1,float16,fp8,0,0.04562666515509287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,4,64,0,1,float16,fp8,0,0.061066667238871254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,4,64,128,1,fp8,fp8,0,0.041759997606277466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,4,64,0,1,fp8,fp8,0,0.05397333204746246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,float16,0,0.03944533318281174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,12,64,128,1,float16,fp8,0,0.031770666440327965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,12,64,128,1,fp8,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,12,64,0,1,float16,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,12,64,0,1,fp8,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,float16,0,0.03181866556406021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,float16,0,0.03973866750796636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,1,64,128,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,1,64,128,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,1,64,0,1,fp8,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,1,64,0,1,float16,fp8,0,0.03992533435424169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,float16,0,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,float16,0,0.039706667264302574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,2,64,128,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,2,64,0,1,float16,fp8,0,0.03944533318281174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,2,64,128,1,fp8,fp8,0,0.029834667841593426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,2,64,0,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,float16,0,0.039808000127474465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,4,64,128,1,float16,fp8,0,0.0322026660044988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,4,64,128,1,fp8,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,4,64,0,1,float16,fp8,0,0.040181333820025124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,4,64,0,1,fp8,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,float16,0,0.0379573330283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,12,64,128,1,float16,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,12,64,128,1,fp8,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,12,64,0,1,float16,fp8,0,0.03828799972931544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,12,64,0,1,fp8,fp8,0,0.03496533383925756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,float16,0,0.029205332199732464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,1,64,128,1,float16,fp8,0,0.029189333319664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,float16,0,0.038106667498747505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,1,64,128,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,1,64,0,1,float16,fp8,0,0.03804266701141993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,1,64,0,1,fp8,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,float16,0,0.03813866774241129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,2,64,128,1,float16,fp8,0,0.02991466720898946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,2,64,128,1,fp8,fp8,0,0.027141332626342773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,2,64,0,1,float16,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,2,64,0,1,fp8,fp8,0,0.033887999753157295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,float16,0,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,float16,0,0.037434667348861694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,4,64,128,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,4,64,128,1,fp8,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,4,64,0,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,4,64,0,1,fp8,fp8,0,0.03398400048414866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,float16,0,0.5341226657231649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,float16,0,0.607370654741923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,1,64,128,1,float16,fp8,0,0.5331519842147827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,1,64,128,1,fp8,fp8,0,0.4954506556193034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,1,64,0,1,float16,fp8,0,0.6079253355662028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,1,64,0,1,fp8,fp8,0,0.5550506512324015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,float16,0,0.5473920106887817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,2,64,128,1,float16,fp8,0,0.5460426807403564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,float16,0,0.6211733420689901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,2,64,128,1,fp8,fp8,0,0.5257066488265991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,2,64,0,1,fp8,fp8,0,0.5829813480377197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,2,64,0,1,float16,fp8,0,0.6201119820276896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,float16,0,0.5514933268229166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,4,64,128,1,float16,fp8,0,0.5464213291803995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,float16,0,0.6265920003255209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,4,64,128,1,fp8,fp8,0,0.5331199963887533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,float16,0,0.3020533323287964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,4,64,0,1,fp8,fp8,0,0.5910666783650717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,4,64,0,1,float16,fp8,0,0.6254613399505615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,12,64,128,1,float16,fp8,0,0.29685332377751666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,float16,0,0.34726933638254803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,12,64,128,1,fp8,fp8,0,0.29640533526738483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,float16,0,0.27540266513824463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,12,64,0,1,fp8,fp8,0,0.3307039936383565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,12,64,0,1,float16,fp8,0,0.34135464827219647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,float16,0,0.3155679901440938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,1,64,128,1,fp8,fp8,0,0.2579200069109599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,1,64,128,1,float16,fp8,0,0.27593066294987995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,1,64,0,1,float16,fp8,0,0.3144960006078084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,1,64,0,1,fp8,fp8,0,0.2884586652119954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,float16,0,0.2811253269513448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,float16,0,0.32100266218185425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,2,64,128,1,float16,fp8,0,0.2808906634648641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,2,64,128,1,fp8,fp8,0,0.2717706759770711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,2,64,0,1,float16,fp8,0,0.31909332672754925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,2,64,0,1,fp8,fp8,0,0.30394667387008667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,float16,0,0.2834773262341817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,float16,0,0.3245973388353984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,4,64,128,1,float16,fp8,0,0.2819146712621053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,4,64,128,1,fp8,fp8,0,0.2752693295478821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,4,64,0,1,float16,fp8,0,0.3227786620457967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,float16,0,0.1604159971078237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,4,64,0,1,fp8,fp8,0,0.3063253362973531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,float16,0,0.1851093371709188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,12,64,128,1,float16,fp8,0,0.15705600380897522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,12,64,128,1,fp8,fp8,0,0.15837867061297098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,12,64,0,1,float16,fp8,0,0.18184000253677368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,12,64,0,1,fp8,fp8,0,0.1766186753908793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,float16,0,0.14588266611099243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,float16,0,0.16740800937016806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,1,64,128,1,float16,fp8,0,0.14637333154678345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,1,64,128,1,fp8,fp8,0,0.13690132896105447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,1,64,0,1,float16,fp8,0,0.16665599743525186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,1,64,0,1,fp8,fp8,0,0.1540426711241404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,float16,0,0.14894400040308634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,float16,0,0.16941332817077637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,2,64,128,1,float16,fp8,0,0.14858667055765787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,2,64,128,1,fp8,fp8,0,0.14356799920399985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,2,64,0,1,float16,fp8,0,0.1696853240331014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,2,64,0,1,fp8,fp8,0,0.16004266341527304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,float16,0,0.15040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,float16,0,0.17138133446375528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,4,64,128,1,float16,fp8,0,0.15079466501871744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,4,64,128,1,fp8,fp8,0,0.14729600151379904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,4,64,0,1,float16,fp8,0,0.17116800944010416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,4,64,0,1,fp8,fp8,0,0.163290669520696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,float16,0,0.08929066856702168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,float16,0,0.10217066605885823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,12,64,128,1,float16,fp8,0,0.08917867143948872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,12,64,128,1,fp8,fp8,0,0.08993599812189738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,12,64,0,1,float16,fp8,0,0.10275733470916748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,12,64,0,1,fp8,fp8,0,0.09987733761469524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,float16,0,0.08291733264923096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,float16,0,0.09443199634552002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,1,64,128,1,float16,fp8,0,0.0821919987599055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,1,64,128,1,fp8,fp8,0,0.07492266595363617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,1,64,0,1,float16,fp8,0,0.09429867068926494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,float16,0,0.08247999846935272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,1,64,0,1,fp8,fp8,0,0.08528000116348267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,float16,0,0.09382399916648865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,2,64,128,1,float16,fp8,0,0.08262933293978374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,2,64,128,1,fp8,fp8,0,0.07679999868075053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,2,64,0,1,float16,fp8,0,0.09489066402117412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,2,64,0,1,fp8,fp8,0,0.0862559974193573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,float16,0,0.083514670530955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,float16,0,0.0946720043818156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,4,64,128,1,float16,fp8,0,0.0831520011027654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,4,64,128,1,fp8,fp8,0,0.07885866860548656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,4,64,0,1,float16,fp8,0,0.09513599673906963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,float16,0,0.052154665191968284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,4,64,0,1,fp8,fp8,0,0.0902400016784668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,float16,0,0.060405333836873375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,12,64,128,1,float16,fp8,0,0.05256533126036326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,12,64,128,1,fp8,fp8,0,0.052341332038243614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,12,64,0,1,float16,fp8,0,0.060405333836873375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,float16,0,0.050111999114354454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,12,64,0,1,fp8,fp8,0,0.05699733396371206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,float16,0,0.058277333776156105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,1,64,128,1,float16,fp8,0,0.04974400003751119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,1,64,128,1,fp8,fp8,0,0.04801600178082784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,1,64,0,1,float16,fp8,0,0.058287998040517174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,1,64,0,1,fp8,fp8,0,0.05253866811593374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,float16,0,0.04994133114814758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,float16,0,0.057722667853037514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,2,64,128,1,float16,fp8,0,0.04984533290068308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,2,64,128,1,fp8,fp8,0,0.04781866570313772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,2,64,0,1,fp8,fp8,0,0.05324266850948334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,2,64,0,1,float16,fp8,0,0.05795200169086456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,float16,0,0.04994666576385498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,float16,0,0.058176000912984215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,4,64,128,1,fp8,fp8,0,0.048170665899912514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,4,64,128,1,float16,fp8,0,0.05011733373006185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,4,64,0,1,float16,fp8,0,0.05899199843406677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,float16,0,0.03597866743803024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,4,64,0,1,fp8,fp8,0,0.05423999826113383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,float16,0,0.04144000013669332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,12,64,128,1,float16,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,12,64,128,1,fp8,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,12,64,0,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,12,64,0,1,fp8,fp8,0,0.039850667119026184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,float16,0,0.03440533330043157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,float16,0,0.03980266551176707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,1,64,128,1,float16,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,1,64,128,1,fp8,fp8,0,0.03329599897066752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,1,64,0,1,float16,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,1,64,0,1,fp8,fp8,0,0.03743999948104223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,float16,0,0.03554133325815201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,float16,0,0.041690667470296226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,2,64,128,1,float16,fp8,0,0.03659199923276901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,2,64,128,1,fp8,fp8,0,0.03467733412981033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,2,64,0,1,float16,fp8,0,0.041834667325019836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,2,64,0,1,fp8,fp8,0,0.03874133278926214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,float16,0,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,float16,0,0.04011200120051702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,4,64,128,1,float16,fp8,0,0.03647999962170919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,4,64,128,1,fp8,fp8,0,0.03465600063403448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,4,64,0,1,float16,fp8,0,0.04067199925581614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,float16,0,0.02571200082699458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,4,64,0,1,fp8,fp8,0,0.03849600007136663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,float16,0,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,12,64,128,1,float16,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,12,64,128,1,fp8,fp8,0,0.025797332326571148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,12,64,0,1,float16,fp8,0,0.029663999875386555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,12,64,0,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,float16,0,0.025653332471847534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,float16,0,0.02977599948644638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,1,64,128,1,float16,fp8,0,0.02489600082238515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,1,64,128,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,1,64,0,1,float16,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,1,64,0,1,fp8,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,float16,0,0.025583999852339428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,2,64,128,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,float16,0,0.030394665896892548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,2,64,128,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,2,64,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,float16,0,0.026474667092164356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,2,64,0,1,fp8,fp8,0,0.028336000939210255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,4,64,128,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,4,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,4,64,0,1,float16,fp8,0,0.029445332785447437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,4,64,0,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,float16,0,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,12,64,128,1,float16,fp8,0,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,12,64,0,1,float16,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,12,64,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,12,64,128,1,fp8,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,float16,0,0.021594665944576263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,float16,0,0.02566933383544286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,1,64,128,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,1,64,128,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,1,64,0,1,float16,fp8,0,0.026186667382717133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,1,64,0,1,fp8,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,float16,0,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,2,64,128,1,float16,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,2,64,128,1,fp8,fp8,0,0.020367999871571858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,2,64,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,2,64,0,1,fp8,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,float16,0,0.021615999440352123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,float16,0,0.025605333348115284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,4,64,128,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,4,64,128,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,4,64,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,4,64,0,1,fp8,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,1,64,128,1,float16,float16,0,0.5204426844914755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,1,64,0,1,float16,float16,0,0.5188266833623251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,1,64,128,1,float16,fp8,0,0.5169333219528198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,1,64,128,1,fp8,fp8,0,0.4803893168767293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,1,64,0,1,fp8,fp8,0,0.4736800193786621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,1,64,0,1,float16,fp8,0,0.5192426840464274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,2,64,128,1,float16,float16,0,0.531989336013794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,2,64,0,1,float16,float16,0,0.5317920049031576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,2,64,128,1,float16,fp8,0,0.5277973413467407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,2,64,0,1,fp8,fp8,0,0.5061013301213583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,2,64,128,1,fp8,fp8,0,0.5078186591466268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,2,64,0,1,float16,fp8,0,0.5300639867782593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,4,64,128,1,float16,float16,0,0.5336746772130331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,4,64,128,1,float16,fp8,0,0.5303680102030436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,4,64,0,1,float16,float16,0,0.5381973187128702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,4,64,128,1,fp8,fp8,0,0.5088640054066976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,4,64,0,1,fp8,fp8,0,0.5116693178812662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,4,64,0,1,float16,fp8,0,0.5328213373819987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,12,64,128,1,float16,float16,0,0.29427733023961383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,12,64,0,1,float16,float16,0,0.29662400484085083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,12,64,128,1,fp8,fp8,0,0.2839253346125285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,12,64,128,1,float16,fp8,0,0.2883146603902181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,12,64,0,1,float16,fp8,0,0.2923733393351237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,12,64,0,1,fp8,fp8,0,0.28435200452804565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,1,64,128,1,float16,float16,0,0.26710933446884155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,1,64,0,1,float16,float16,0,0.26969067255655926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,1,64,128,1,float16,fp8,0,0.26864532629648846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,1,64,128,1,fp8,fp8,0,0.24830400943756104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,1,64,0,1,float16,fp8,0,0.2670133312543233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,1,64,0,1,fp8,fp8,0,0.24646933873494467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,2,64,128,1,float16,float16,0,0.27483199040095013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,2,64,0,1,float16,float16,0,0.27374933163324994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,2,64,128,1,float16,fp8,0,0.2731146613756816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,2,64,128,1,fp8,fp8,0,0.26307199398676556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,2,64,0,1,float16,fp8,0,0.2738879919052124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,2,64,0,1,fp8,fp8,0,0.2613333264986674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,4,64,128,1,float16,float16,0,0.2749119997024536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,4,64,0,1,float16,float16,0,0.27595200141270954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,4,64,128,1,float16,fp8,0,0.27407999833424884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,4,64,128,1,fp8,fp8,0,0.2630133430163066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,4,64,0,1,float16,fp8,0,0.2749333381652832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,4,64,0,1,fp8,fp8,0,0.26314133405685425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,12,64,128,1,float16,float16,0,0.15521066387494406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,12,64,0,1,float16,float16,0,0.1575146714846293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,12,64,128,1,float16,fp8,0,0.15321600437164307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,12,64,128,1,fp8,fp8,0,0.1527466674645742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,12,64,0,1,float16,fp8,0,0.15520000457763672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,12,64,0,1,fp8,fp8,0,0.1525226632754008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,1,64,128,1,float16,float16,0,0.14176000157992044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,1,64,0,1,float16,float16,0,0.14340800046920776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,1,64,128,1,float16,fp8,0,0.14223466316858926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,1,64,128,1,fp8,fp8,0,0.13128532965977988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,1,64,0,1,float16,fp8,0,0.1421280006567637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,1,64,0,1,fp8,fp8,0,0.1316480040550232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,2,64,128,1,float16,float16,0,0.14497066537539163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,2,64,0,1,float16,float16,0,0.1448906660079956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,2,64,128,1,float16,fp8,0,0.1439786652723948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,2,64,128,1,fp8,fp8,0,0.13848533233006796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,2,64,0,1,float16,fp8,0,0.14482667048772177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,4,64,128,1,float16,float16,0,0.1460533340771993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,2,64,0,1,fp8,fp8,0,0.13754666845003763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,4,64,0,1,float16,float16,0,0.1474026640256246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,4,64,128,1,float16,fp8,0,0.14627200365066528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,4,64,128,1,fp8,fp8,0,0.1425440013408661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,4,64,0,1,float16,fp8,0,0.1462399959564209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,4,64,0,1,fp8,fp8,0,0.14121599992116293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,12,64,128,1,float16,float16,0,0.08830933769543965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,12,64,0,1,float16,float16,0,0.08763200044631958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,12,64,128,1,float16,fp8,0,0.08754666646321614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,12,64,128,1,fp8,fp8,0,0.08736000458399455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,12,64,0,1,float16,fp8,0,0.0862613320350647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,12,64,0,1,fp8,fp8,0,0.08673066894213359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,1,64,128,1,float16,float16,0,0.08057066798210144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,1,64,0,1,float16,float16,0,0.08057066798210144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,1,64,128,1,float16,fp8,0,0.08004799981911977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,1,64,128,1,fp8,fp8,0,0.07369066774845123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,1,64,0,1,float16,fp8,0,0.08074666559696198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,1,64,0,1,fp8,fp8,0,0.07296533385912578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,2,64,128,1,float16,float16,0,0.08072000245253245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,2,64,0,1,float16,float16,0,0.07954666515191396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,2,64,128,1,float16,fp8,0,0.08117866516113281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,2,64,128,1,fp8,fp8,0,0.07603199779987335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,2,64,0,1,float16,fp8,0,0.08001066744327545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,2,64,0,1,fp8,fp8,0,0.07315200070540111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,4,64,128,1,float16,float16,0,0.08088000118732452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,4,64,0,1,float16,float16,0,0.08105066418647766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,4,64,128,1,float16,fp8,0,0.08188800017038982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,4,64,128,1,fp8,fp8,0,0.07795733213424683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,4,64,0,1,float16,fp8,0,0.08170666793982188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,4,64,0,1,fp8,fp8,0,0.07771199941635132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,12,64,128,1,float16,float16,0,0.050213331977526345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,12,64,0,1,float16,float16,0,0.0513973335425059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,12,64,128,1,float16,fp8,0,0.05017599960168203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,12,64,128,1,fp8,fp8,0,0.04896533489227295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,12,64,0,1,float16,fp8,0,0.05189333359400431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,12,64,0,1,fp8,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,1,64,128,1,float16,float16,0,0.050010666251182556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,1,64,0,1,float16,float16,0,0.05008000135421753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,1,64,128,1,float16,fp8,0,0.04804266492525736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,1,64,128,1,fp8,fp8,0,0.04688533147176107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,1,64,0,1,float16,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,1,64,0,1,fp8,fp8,0,0.04607999821503957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,2,64,0,1,float16,float16,0,0.05016533533732096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,2,64,128,1,float16,float16,0,0.0499839981396993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,2,64,128,1,float16,fp8,0,0.050010666251182556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,2,64,128,1,fp8,fp8,0,0.04799466828505198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,2,64,0,1,float16,fp8,0,0.04981866478919983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,2,64,0,1,fp8,fp8,0,0.046709333856900535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,4,64,128,1,float16,float16,0,0.049733335773150124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,4,64,0,1,float16,float16,0,0.05016533533732096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,4,64,128,1,float16,fp8,0,0.04977599779764811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,4,64,128,1,fp8,fp8,0,0.04693333307902018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,4,64,0,1,float16,fp8,0,0.049135997891426086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,4,64,0,1,fp8,fp8,0,0.04755199948946635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,12,64,128,1,float16,float16,0,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,12,64,0,1,float16,float16,0,0.03587199995915095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,12,64,128,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,12,64,128,1,fp8,fp8,0,0.03548266738653183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,12,64,0,1,float16,fp8,0,0.03585066646337509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,12,64,0,1,fp8,fp8,0,0.03498133271932602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,1,64,128,1,float16,float16,0,0.03531199942032496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,1,64,0,1,float16,float16,0,0.033946665624777474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,1,64,128,1,float16,fp8,0,0.03386666625738144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,1,64,128,1,fp8,fp8,0,0.033728001018365227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,1,64,0,1,float16,fp8,0,0.03347733368476232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,1,64,0,1,fp8,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,2,64,128,1,float16,float16,0,0.03585600107908249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,2,64,0,1,float16,float16,0,0.03399466723203659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,2,64,128,1,float16,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,2,64,128,1,fp8,fp8,0,0.03446933378775915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,2,64,0,1,float16,fp8,0,0.03428266694148382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,2,64,0,1,fp8,fp8,0,0.03339733431736628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,4,64,128,1,float16,float16,0,0.034373333056767784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,4,64,0,1,float16,float16,0,0.035989334185918175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,4,64,128,1,float16,fp8,0,0.03492266684770584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,4,64,128,1,fp8,fp8,0,0.033759998778502144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,4,64,0,1,float16,fp8,0,0.03604800005753835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,4,64,0,1,fp8,fp8,0,0.0317493329445521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,12,64,128,1,float16,float16,0,0.02590399980545044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,12,64,0,1,float16,float16,0,0.026026666164398193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,12,64,128,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,12,64,128,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,12,64,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,12,64,0,1,fp8,fp8,0,0.02555199960867564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,1,64,128,1,float16,float16,0,0.025594666600227356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,1,64,128,1,float16,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,1,64,128,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,1,64,0,1,float16,float16,0,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,1,64,0,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,1,64,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,2,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,2,64,0,1,float16,float16,0,0.025120000044504803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,2,64,128,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,2,64,128,1,fp8,fp8,0,0.024890666206677754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,2,64,0,1,float16,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,2,64,0,1,fp8,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,4,64,128,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,4,64,0,1,float16,float16,0,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,4,64,128,1,float16,fp8,0,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,4,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,4,64,0,1,float16,fp8,0,0.026714667677879333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,4,64,0,1,fp8,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,12,64,128,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,12,64,0,1,float16,float16,0,0.023039999107519787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,12,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,12,64,128,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,12,64,0,1,float16,fp8,0,0.02163733293612798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,12,64,0,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,1,64,128,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,1,64,0,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,1,64,128,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,1,64,0,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,1,64,128,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,1,64,0,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,2,64,128,1,float16,float16,0,0.020842666427294414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,2,64,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,2,64,128,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,2,64,128,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,2,64,0,1,float16,fp8,0,0.021669333179791767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,4,64,128,1,float16,float16,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,2,64,0,1,fp8,fp8,0,0.020576000213623047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,4,64,0,1,float16,float16,0,0.0220320001244545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,4,64,128,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,4,64,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,4,64,0,1,float16,fp8,0,0.022346665461858112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,4,64,0,1,fp8,fp8,0,0.020080000162124634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,12,64,128,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,12,64,128,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,12,64,128,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,12,64,0,1,float16,float16,0,0.021695998807748158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,12,64,0,1,float16,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,12,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,1,64,128,1,float16,float16,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,1,64,0,1,float16,float16,0,0.020207999895016353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,1,64,128,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,1,64,128,1,fp8,fp8,0,0.02004266654451688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,1,64,0,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,1,64,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,2,64,128,1,float16,float16,0,0.021717332303524017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,2,64,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,2,64,128,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,2,64,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,2,64,0,1,fp8,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,2,64,128,1,fp8,fp8,0,0.019733333339293797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,4,64,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,4,64,128,1,float16,float16,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,4,64,128,1,float16,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,4,64,128,1,fp8,fp8,0,0.02014933278163274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,4,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,4,64,0,1,fp8,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,1,64,128,1,float16,float16,0,0.2518826723098755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,1,64,0,1,float16,float16,0,0.24758400519688925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,1,64,128,1,float16,fp8,0,0.25336533784866333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,1,64,128,1,fp8,fp8,0,0.23175466060638428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,1,64,0,1,float16,fp8,0,0.2471839984258016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,1,64,0,1,fp8,fp8,0,0.22687999407450357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,2,64,128,1,float16,float16,0,0.2590133349100749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,2,64,0,1,float16,float16,0,0.2542346715927124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,2,64,128,1,float16,fp8,0,0.2582079966862996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,2,64,128,1,fp8,fp8,0,0.24705066283543906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,2,64,0,1,float16,fp8,0,0.2509760061899821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,2,64,0,1,fp8,fp8,0,0.2399253249168396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,4,64,128,1,float16,float16,0,0.2605813344319661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,4,64,0,1,float16,float16,0,0.25459200143814087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,4,64,128,1,float16,fp8,0,0.2581920027732849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,4,64,128,1,fp8,fp8,0,0.24986666440963745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,4,64,0,1,float16,fp8,0,0.2523519992828369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,4,64,0,1,fp8,fp8,0,0.24228266874949136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,12,64,128,1,float16,float16,0,0.14643733700116476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,12,64,0,1,float16,float16,0,0.14405866463979086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,12,64,128,1,float16,fp8,0,0.14525866508483887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,12,64,128,1,fp8,fp8,0,0.14484266440073648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,12,64,0,1,float16,fp8,0,0.14315733313560486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,12,64,0,1,fp8,fp8,0,0.14138133327166238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,1,64,128,1,float16,float16,0,0.13553067048390707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,1,64,0,1,float16,float16,0,0.13225066661834717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,1,64,128,1,float16,fp8,0,0.13618666927019754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,1,64,0,1,float16,fp8,0,0.13152000308036804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,1,64,0,1,fp8,fp8,0,0.1200373371442159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,1,64,128,1,fp8,fp8,0,0.1244053343931834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,2,64,128,1,float16,float16,0,0.1384213368097941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,2,64,0,1,float16,float16,0,0.1344319979349772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,2,64,128,1,float16,fp8,0,0.13731732964515686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,2,64,128,1,fp8,fp8,0,0.13108799854914346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,2,64,0,1,float16,fp8,0,0.1344213287035624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,2,64,0,1,fp8,fp8,0,0.12577600280443826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,4,64,128,1,float16,float16,0,0.13924800356229147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,4,64,0,1,float16,float16,0,0.136272003253301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,4,64,128,1,float16,fp8,0,0.13757866621017456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,4,64,128,1,fp8,fp8,0,0.13406933347384134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,4,64,0,1,float16,fp8,0,0.13592533270517984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,12,64,128,1,float16,float16,0,0.08372267087300618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,12,64,0,1,float16,float16,0,0.0811359981695811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,12,64,128,1,float16,fp8,0,0.0830080012480418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,4,64,0,1,fp8,fp8,0,0.13146133224169412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,12,64,128,1,fp8,fp8,0,0.08457066615422566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,12,64,0,1,float16,fp8,0,0.07977066437403361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,12,64,0,1,fp8,fp8,0,0.08145600060621898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,1,64,128,1,float16,float16,0,0.07713599999745686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,1,64,0,1,float16,float16,0,0.07503466804822286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,1,64,128,1,float16,fp8,0,0.07667733232180278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,1,64,128,1,fp8,fp8,0,0.0710399995247523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,1,64,0,1,float16,fp8,0,0.07518933216730754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,1,64,0,1,fp8,fp8,0,0.06818666557470958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,2,64,128,1,float16,float16,0,0.0777759999036789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,2,64,0,1,float16,float16,0,0.07500799993673961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,2,64,128,1,float16,fp8,0,0.07717333237330119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,2,64,128,1,fp8,fp8,0,0.07158400118350983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,2,64,0,1,float16,fp8,0,0.07531733314196269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,2,64,0,1,fp8,fp8,0,0.07055466870466869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,4,64,128,1,float16,float16,0,0.07681066791216533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,4,64,0,1,float16,float16,0,0.07558933397134145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,4,64,128,1,float16,fp8,0,0.07685866455237071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,4,64,128,1,fp8,fp8,0,0.0751093327999115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,4,64,0,1,float16,fp8,0,0.07613333563009898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,4,64,0,1,fp8,fp8,0,0.07102400064468384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,12,64,128,1,float16,float16,0,0.04780266682306925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,12,64,0,1,float16,float16,0,0.046309332052866616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,12,64,128,1,float16,fp8,0,0.048063998421033226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,12,64,128,1,fp8,fp8,0,0.04615999758243561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,12,64,0,1,float16,fp8,0,0.047279998660087585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,12,64,0,1,fp8,fp8,0,0.04420800010363261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,1,64,128,1,float16,float16,0,0.04597333570321401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,1,64,0,1,float16,float16,0,0.04367466767628988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,1,64,128,1,float16,fp8,0,0.04673066735267639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,1,64,128,1,fp8,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,1,64,0,1,float16,fp8,0,0.04590400060017904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,1,64,0,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,2,64,128,1,float16,float16,0,0.045978665351867676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,2,64,0,1,float16,float16,0,0.045408000548680626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,2,64,128,1,float16,fp8,0,0.046165332198143005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,2,64,128,1,fp8,fp8,0,0.04397333165009817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,2,64,0,1,float16,fp8,0,0.04469866553942362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,2,64,0,1,fp8,fp8,0,0.043381333351135254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,4,64,128,1,float16,float16,0,0.04800533254941305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,4,64,0,1,float16,float16,0,0.04433600107828776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,4,64,128,1,float16,fp8,0,0.0476746658484141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,4,64,128,1,fp8,fp8,0,0.044400001565615334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,4,64,0,1,float16,fp8,0,0.04426133135954539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,4,64,0,1,fp8,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,12,64,128,1,float16,float16,0,0.03331733246644338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,12,64,0,1,float16,float16,0,0.03262399882078171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,12,64,128,1,float16,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,12,64,128,1,fp8,fp8,0,0.03315199911594391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,12,64,0,1,float16,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,1,64,128,1,float16,float16,0,0.033887999753157295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,12,64,0,1,fp8,fp8,0,0.032405334214369454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,1,64,0,1,float16,float16,0,0.03203733265399933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,1,64,128,1,float16,fp8,0,0.03342933456103007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,1,64,128,1,fp8,fp8,0,0.03173866619666418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,1,64,0,1,float16,fp8,0,0.03277866790692011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,1,64,0,1,fp8,fp8,0,0.03199466566244761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,2,64,128,1,float16,float16,0,0.034517332911491394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,2,64,0,1,float16,float16,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,2,64,128,1,float16,fp8,0,0.03432533393303553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,2,64,128,1,fp8,fp8,0,0.03331733246644338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,2,64,0,1,float16,fp8,0,0.03198933353026708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,2,64,0,1,fp8,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,4,64,128,1,float16,float16,0,0.033439998825391136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,4,64,0,1,float16,float16,0,0.03170666595300039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,4,64,128,1,float16,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,4,64,128,1,fp8,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,4,64,0,1,float16,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,4,64,0,1,fp8,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,12,64,128,1,float16,float16,0,0.02492266645034154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,12,64,0,1,float16,float16,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,12,64,128,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,12,64,128,1,fp8,fp8,0,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,12,64,0,1,float16,fp8,0,0.02459733436505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,12,64,0,1,fp8,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,1,64,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,1,64,128,1,float16,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,1,64,0,1,float16,float16,0,0.023669332265853882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,1,64,128,1,fp8,fp8,0,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,1,64,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,1,64,0,1,fp8,fp8,0,0.02258133391539256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,2,64,128,1,float16,float16,0,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,2,64,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,2,64,128,1,float16,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,2,64,128,1,fp8,fp8,0,0.023610666394233704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,2,64,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,2,64,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,4,64,128,1,float16,float16,0,0.02369600037733714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,4,64,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,4,64,128,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,4,64,128,1,fp8,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,4,64,0,1,float16,fp8,0,0.024714666108290356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,4,64,0,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,12,64,128,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,12,64,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,12,64,128,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,12,64,0,1,float16,fp8,0,0.01987733319401741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,12,64,128,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,12,64,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,1,64,128,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,1,64,0,1,float16,float16,0,0.019573333362738293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,1,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,1,64,128,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,1,64,0,1,float16,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,1,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,2,64,128,1,float16,float16,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,2,64,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,2,64,128,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,2,64,128,1,fp8,fp8,0,0.01956266661485036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,2,64,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,2,64,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,4,64,128,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,4,64,0,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,4,64,128,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,4,64,128,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,4,64,0,1,float16,fp8,0,0.020810666183630627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,4,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,12,64,128,1,float16,float16,0,0.01966933285196622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,12,64,0,1,float16,float16,0,0.019754666835069656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,12,64,128,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,12,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,12,64,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,12,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,1,64,128,1,float16,float16,0,0.018613333503405254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,1,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,1,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,1,64,0,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,1,64,128,1,float16,fp8,0,0.019893333315849304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,1,64,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,2,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,2,64,128,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,2,64,0,1,float16,float16,0,0.018053332964579265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,2,64,128,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,2,64,0,1,float16,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,2,64,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,4,64,128,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,4,64,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,4,64,128,1,float16,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,4,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,4,64,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,4,64,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,12,64,128,1,float16,float16,0,0.018458666900793713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,12,64,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,12,64,128,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,12,64,128,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,12,64,0,1,float16,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,12,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,1,64,128,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,1,64,0,1,float16,float16,0,0.0179626668492953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,1,64,128,1,float16,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,1,64,128,1,fp8,fp8,0,0.018160000443458557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,1,64,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,1,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,2,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,2,64,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,2,64,128,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,2,64,128,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,2,64,0,1,float16,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,2,64,0,1,fp8,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,4,64,128,1,float16,float16,0,0.01786133274435997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,4,64,0,1,float16,float16,0,0.017616000026464462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,4,64,128,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,4,64,128,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,4,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,4,64,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,1,64,128,1,float16,float16,0,0.13828800121943155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,1,64,0,1,float16,float16,0,0.13936000068982443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,1,64,128,1,float16,fp8,0,0.1383519967397054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,1,64,128,1,fp8,fp8,0,0.12970133622487387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,1,64,0,1,float16,fp8,0,0.1384160021940867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,1,64,0,1,fp8,fp8,0,0.13038933277130127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,2,64,128,1,float16,float16,0,0.14166399836540222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,2,64,128,1,float16,fp8,0,0.14148267110188803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,2,64,0,1,float16,float16,0,0.14220266540845236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,2,64,128,1,fp8,fp8,0,0.1365120013554891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,2,64,0,1,float16,fp8,0,0.14215999841690063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,2,64,0,1,fp8,fp8,0,0.1362773378690084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,4,64,128,1,float16,float16,0,0.14175466696421304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,4,64,0,1,float16,float16,0,0.14333867033322653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,4,64,128,1,float16,fp8,0,0.14249066511789957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,4,64,128,1,fp8,fp8,0,0.13986666997273764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,4,64,0,1,float16,fp8,0,0.14225600163141885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,4,64,0,1,fp8,fp8,0,0.14032000303268433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,12,64,128,1,float16,float16,0,0.08471999565760295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,12,64,0,1,float16,float16,0,0.08411199847857158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,12,64,128,1,float16,fp8,0,0.08318399886290233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,12,64,128,1,fp8,fp8,0,0.08531733353932698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,12,64,0,1,float16,fp8,0,0.08277333279450734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,12,64,0,1,fp8,fp8,0,0.08515733480453491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,1,64,128,1,float16,float16,0,0.07799999912579854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,1,64,0,1,float16,float16,0,0.07704000174999237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,1,64,128,1,float16,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,1,64,128,1,fp8,fp8,0,0.0722453345855077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,1,64,0,1,float16,fp8,0,0.07701333363850911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,1,64,0,1,fp8,fp8,0,0.07318399846553802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,2,64,0,1,float16,float16,0,0.07679466903209686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,2,64,128,1,float16,float16,0,0.07810133198897044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,2,64,128,1,float16,fp8,0,0.07879466811815898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,2,64,128,1,fp8,fp8,0,0.07428800066312154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,2,64,0,1,float16,fp8,0,0.07682133217652638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,4,64,128,1,float16,float16,0,0.0786293347676595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,4,64,0,1,float16,float16,0,0.07975466549396515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,2,64,0,1,fp8,fp8,0,0.07467199862003326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,4,64,128,1,float16,fp8,0,0.0783786674340566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,4,64,128,1,fp8,fp8,0,0.07655466596285503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,12,64,128,1,float16,float16,0,0.04959466556708018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,4,64,0,1,float16,fp8,0,0.07926400005817413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,4,64,0,1,fp8,fp8,0,0.07737066845099132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,12,64,0,1,float16,float16,0,0.04877333343029022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,12,64,128,1,float16,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,12,64,128,1,fp8,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,12,64,0,1,float16,fp8,0,0.048250665267308555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,12,64,0,1,fp8,fp8,0,0.04833066463470459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,1,64,128,1,float16,float16,0,0.04613333443800608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,1,64,0,1,float16,float16,0,0.046181331078211464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,1,64,128,1,float16,fp8,0,0.04655466477076212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,1,64,128,1,fp8,fp8,0,0.04557866851488749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,1,64,0,1,float16,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,1,64,0,1,fp8,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,2,64,128,1,float16,float16,0,0.048026666045188904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,2,64,0,1,float16,float16,0,0.04827733337879181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,2,64,128,1,float16,fp8,0,0.04571199913819631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,2,64,128,1,fp8,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,2,64,0,1,float16,fp8,0,0.048010667165120445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,2,64,0,1,fp8,fp8,0,0.046069333950678505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,4,64,128,1,float16,float16,0,0.047877331574757896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,4,64,0,1,float16,float16,0,0.047007997830708824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,4,64,128,1,float16,fp8,0,0.047914668917655945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,4,64,128,1,fp8,fp8,0,0.04513066510359446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,4,64,0,1,float16,fp8,0,0.04775999983151754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,12,64,0,1,float16,float16,0,0.03179199993610382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,12,64,128,1,float16,float16,0,0.032229334115982056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,4,64,0,1,fp8,fp8,0,0.04580266773700714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,12,64,128,1,float16,fp8,0,0.0315786674618721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,12,64,128,1,fp8,fp8,0,0.031957333286603294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,12,64,0,1,float16,fp8,0,0.031898667414983116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,12,64,0,1,fp8,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,1,64,128,1,float16,float16,0,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,1,64,0,1,float16,float16,0,0.030970667799313862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,1,64,128,1,float16,fp8,0,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,1,64,128,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,1,64,0,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,1,64,0,1,fp8,fp8,0,0.030063999195893604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,2,64,128,1,float16,float16,0,0.03165333221356074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,2,64,0,1,float16,float16,0,0.03173866619666418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,2,64,128,1,float16,fp8,0,0.03136000037193298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,2,64,128,1,fp8,fp8,0,0.029701332251230877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,2,64,0,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,2,64,0,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,4,64,128,1,float16,float16,0,0.03142400085926056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,4,64,0,1,float16,float16,0,0.031930667658646904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,4,64,128,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,4,64,128,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,4,64,0,1,float16,fp8,0,0.031770666440327965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,4,64,0,1,fp8,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,12,64,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,12,64,128,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,12,64,128,1,float16,fp8,0,0.024527999262015026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,12,64,128,1,fp8,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,12,64,0,1,float16,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,12,64,0,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,1,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,1,64,0,1,float16,float16,0,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,1,64,128,1,fp8,fp8,0,0.024858665963013966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,1,64,128,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,1,64,0,1,float16,fp8,0,0.025653332471847534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,1,64,0,1,fp8,fp8,0,0.024298667907714844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,2,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,2,64,0,1,float16,float16,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,2,64,128,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,2,64,128,1,fp8,fp8,0,0.023786666492621105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,2,64,0,1,float16,fp8,0,0.025040000677108765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,2,64,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,4,64,128,1,float16,float16,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,4,64,0,1,float16,float16,0,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,4,64,128,1,float16,fp8,0,0.025583999852339428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,4,64,128,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,4,64,0,1,float16,fp8,0,0.0245919997493426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,4,64,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,12,64,128,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,12,64,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,12,64,128,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,12,64,128,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,12,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,12,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,1,64,128,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,1,64,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,1,64,128,1,float16,fp8,0,0.01762666677435239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,1,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,1,64,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,1,64,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,2,64,128,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,2,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,2,64,128,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,2,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,2,64,0,1,float16,fp8,0,0.017786666750907898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,2,64,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,4,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,4,64,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,4,64,128,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,4,64,128,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,4,64,0,1,float16,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,4,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,12,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,12,64,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,12,64,128,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,12,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,12,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,12,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,1,64,128,1,float16,float16,0,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,1,64,0,1,float16,float16,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,1,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,1,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,1,64,0,1,fp8,fp8,0,0.016149333367745083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,1,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,2,64,128,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,2,64,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,2,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,2,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,2,64,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,2,64,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,4,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,4,64,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,4,64,128,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,4,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,4,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,4,64,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,12,64,128,1,float16,float16,0,0.01584533353646596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,12,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,12,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,12,64,0,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,12,64,0,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,12,64,128,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,1,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,1,64,0,1,float16,float16,0,0.016554666062196095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,1,64,128,1,float16,fp8,0,0.016058667252461117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,1,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,1,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,2,64,128,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,1,64,0,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,2,64,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,2,64,128,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,2,64,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,2,64,128,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,2,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,4,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,4,64,0,1,float16,float16,0,0.017632000148296356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,4,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,4,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,4,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,4,64,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,12,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,12,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,12,64,128,1,float16,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,12,64,128,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,12,64,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,1,64,128,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,12,64,0,1,fp8,fp8,0,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,1,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,1,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,1,64,128,1,fp8,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,1,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,1,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,2,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,2,64,128,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,2,64,128,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,2,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,2,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,2,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,4,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,4,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,4,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,4,64,128,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,4,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,4,64,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,1,64,128,1,float16,float16,0,0.09801600376764934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,1,64,0,1,float16,float16,0,0.09789866209030151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,1,64,128,1,float16,fp8,0,0.09760533769925435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,1,64,128,1,fp8,fp8,0,0.09194667140642802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,1,64,0,1,float16,fp8,0,0.09703466296195984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,1,64,0,1,fp8,fp8,0,0.09136533737182617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,2,64,128,1,float16,float16,0,0.09874133268992107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,2,64,0,1,float16,float16,0,0.09729066491127014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,2,64,128,1,float16,fp8,0,0.09853333234786987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,2,64,128,1,fp8,fp8,0,0.09107200304667155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,2,64,0,1,float16,fp8,0,0.09769599636395772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,2,64,0,1,fp8,fp8,0,0.0923520028591156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,4,64,128,1,float16,float16,0,0.0977226694424947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,4,64,0,1,float16,float16,0,0.0993386705716451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,4,64,128,1,float16,fp8,0,0.09935466448465984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,4,64,128,1,fp8,fp8,0,0.09424533446629842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,4,64,0,1,float16,fp8,0,0.09909866253534953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,12,64,128,1,float16,float16,0,0.058149332801500954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,4,64,0,1,fp8,fp8,0,0.09400000174840291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,12,64,0,1,float16,float16,0,0.058490668733914696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,12,64,128,1,float16,fp8,0,0.05855466425418854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,12,64,128,1,fp8,fp8,0,0.05816533168156942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,12,64,0,1,float16,fp8,0,0.05795733133951823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,12,64,0,1,fp8,fp8,0,0.05615466833114624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,1,64,128,1,float16,float16,0,0.05640000104904175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,1,64,0,1,float16,float16,0,0.056128000219662987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,1,64,128,1,float16,fp8,0,0.05637866755326589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,1,64,0,1,float16,fp8,0,0.056133334835370384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,1,64,0,1,fp8,fp8,0,0.05384533107280731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,1,64,128,1,fp8,fp8,0,0.05531733234723409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,2,64,128,1,float16,float16,0,0.05653866628805796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,2,64,0,1,float16,float16,0,0.05755199988683065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,2,64,128,1,float16,fp8,0,0.05633600056171417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,2,64,0,1,float16,fp8,0,0.05787733197212219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,2,64,128,1,fp8,fp8,0,0.05519466598828634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,2,64,0,1,fp8,fp8,0,0.05428266525268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,4,64,128,1,float16,float16,0,0.056405335664749146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,4,64,0,1,float16,float16,0,0.057850668827692665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,4,64,128,1,float16,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,4,64,128,1,fp8,fp8,0,0.054560000697771706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,4,64,0,1,float16,fp8,0,0.05644799768924713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,4,64,0,1,fp8,fp8,0,0.054383998115857445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,12,64,128,1,float16,float16,0,0.03803733239571253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,12,64,128,1,float16,fp8,0,0.03759466608365377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,12,64,0,1,float16,float16,0,0.03807466725508372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,12,64,128,1,fp8,fp8,0,0.03749333322048187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,12,64,0,1,float16,fp8,0,0.038191998998324074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,12,64,0,1,fp8,fp8,0,0.0358240008354187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,1,64,128,1,float16,float16,0,0.035445332527160645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,1,64,0,1,float16,float16,0,0.03578133384386698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,1,64,128,1,float16,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,1,64,128,1,fp8,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,1,64,0,1,float16,fp8,0,0.035962666074434914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,1,64,0,1,fp8,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,2,64,128,1,float16,float16,0,0.03664000084002813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,2,64,0,1,float16,float16,0,0.03758399933576584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,2,64,128,1,float16,fp8,0,0.036133334040641785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,2,64,128,1,fp8,fp8,0,0.036464000741640724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,2,64,0,1,fp8,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,2,64,0,1,float16,fp8,0,0.037802666425704956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,4,64,128,1,float16,float16,0,0.0374293327331543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,4,64,0,1,float16,float16,0,0.037621334195137024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,4,64,128,1,float16,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,4,64,0,1,float16,fp8,0,0.03730666637420654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,4,64,128,1,fp8,fp8,0,0.035402665535608925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,4,64,0,1,fp8,fp8,0,0.03607466568549474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,12,64,128,1,float16,float16,0,0.027093333502610523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,12,64,0,1,float16,float16,0,0.02586666742960612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,12,64,128,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,12,64,128,1,fp8,fp8,0,0.027263998985290527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,12,64,0,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,12,64,0,1,fp8,fp8,0,0.025589334468046825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,1,64,128,1,float16,float16,0,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,1,64,0,1,float16,float16,0,0.026901334524154663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,1,64,128,1,float16,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,1,64,128,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,1,64,0,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,1,64,0,1,fp8,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,2,64,128,1,float16,float16,0,0.025920001169045765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,2,64,0,1,float16,float16,0,0.02624533325433731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,2,64,128,1,float16,fp8,0,0.02604266752799352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,2,64,128,1,fp8,fp8,0,0.025706666211287182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,2,64,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,2,64,0,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,4,64,128,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,4,64,0,1,float16,float16,0,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,4,64,128,1,float16,fp8,0,0.02571200082699458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,4,64,128,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,4,64,0,1,float16,fp8,0,0.025701334079106648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,4,64,0,1,fp8,fp8,0,0.025333332518736523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,12,64,128,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,12,64,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,12,64,128,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,12,64,128,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,12,64,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,1,64,128,1,float16,float16,0,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,12,64,0,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,1,64,0,1,float16,float16,0,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,1,64,128,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,1,64,128,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,1,64,0,1,float16,fp8,0,0.019637333850065868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,1,64,0,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,2,64,128,1,float16,float16,0,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,2,64,0,1,float16,float16,0,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,2,64,128,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,2,64,128,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,2,64,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,2,64,0,1,fp8,fp8,0,0.019738666713237762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,4,64,128,1,float16,float16,0,0.019679999599854153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,4,64,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,4,64,128,1,float16,fp8,0,0.020879998803138733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,4,64,128,1,fp8,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,4,64,0,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,4,64,0,1,fp8,fp8,0,0.019600000232458115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,12,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,12,64,0,1,float16,float16,0,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,12,64,128,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,12,64,128,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,12,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,12,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,1,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,1,64,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,1,64,128,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,1,64,0,1,fp8,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,1,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,2,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,2,64,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,2,64,128,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,2,64,0,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,2,64,128,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,2,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,4,64,128,1,float16,float16,0,0.016143999993801117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,4,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,4,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,4,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,4,64,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,4,64,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,12,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,12,64,128,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,12,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,12,64,128,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,12,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,12,64,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,1,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,1,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,1,64,128,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,1,64,128,1,fp8,fp8,0,0.016197333733240765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,1,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,1,64,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,2,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,2,64,0,1,float16,float16,0,0.01565333331624667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,2,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,2,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,2,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,2,64,0,1,fp8,fp8,0,0.016042667130629223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,4,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,4,64,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,4,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,4,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,4,64,128,1,fp8,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,12,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,4,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,12,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,12,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,12,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,12,64,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,12,64,0,1,fp8,fp8,0,0.015813333292802174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,1,64,128,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,1,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,1,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,1,64,128,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,1,64,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,1,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,2,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,2,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,2,64,128,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,2,64,128,1,fp8,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,2,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,2,64,0,1,fp8,fp8,0,0.01642666632930438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,4,64,128,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,4,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,4,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,4,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,4,64,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,4,64,0,1,fp8,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,12,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,12,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,12,64,128,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,12,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,12,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,12,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,1,64,128,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,1,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,1,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,1,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,1,64,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,1,64,0,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,2,64,128,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,2,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,2,64,128,1,float16,fp8,0,0.016143999993801117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,2,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,2,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,2,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,4,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,4,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,4,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,4,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,4,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,4,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,1,64,0,1,float16,float16,0,0.07658666869004567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,1,64,128,1,float16,float16,0,0.07890133559703827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,1,64,128,1,float16,fp8,0,0.07825600107510884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,1,64,128,1,fp8,fp8,0,0.07251733541488647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,1,64,0,1,float16,fp8,0,0.07864533364772797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,1,64,0,1,fp8,fp8,0,0.07387733459472656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,2,64,128,1,float16,float16,0,0.07863999903202057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,2,64,0,1,float16,float16,0,0.07796800136566162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,2,64,128,1,float16,fp8,0,0.07826133569081624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,2,64,128,1,fp8,fp8,0,0.07468266785144806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,2,64,0,1,float16,fp8,0,0.07755733529726665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,2,64,0,1,fp8,fp8,0,0.07494399944941203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,4,64,128,1,float16,float16,0,0.07649600009123485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,4,64,0,1,float16,float16,0,0.07817066709200542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,4,64,128,1,float16,fp8,0,0.07659199833869934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,4,64,128,1,fp8,fp8,0,0.07272000114123027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,4,64,0,1,float16,fp8,0,0.0777759999036789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,4,64,0,1,fp8,fp8,0,0.07275199890136719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,12,64,128,1,float16,float16,0,0.04795733094215393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,12,64,0,1,float16,float16,0,0.04882133503754934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,12,64,128,1,float16,fp8,0,0.04827733337879181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,12,64,128,1,fp8,fp8,0,0.04613866905371348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,12,64,0,1,float16,fp8,0,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,12,64,0,1,fp8,fp8,0,0.04790933430194855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,1,64,128,1,float16,float16,0,0.04756799836953481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,1,64,128,1,float16,fp8,0,0.047413334250450134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,1,64,0,1,float16,float16,0,0.0466186652580897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,1,64,128,1,fp8,fp8,0,0.046069333950678505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,1,64,0,1,float16,fp8,0,0.046015997727712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,1,64,0,1,fp8,fp8,0,0.045994664231936135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,2,64,0,1,float16,float16,0,0.047770669062932335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,2,64,128,1,float16,fp8,0,0.04610666632652283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,2,64,128,1,float16,float16,0,0.04624533156553904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,2,64,128,1,fp8,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,2,64,0,1,float16,fp8,0,0.04614933331807455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,2,64,0,1,fp8,fp8,0,0.04531733194986979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,4,64,128,1,float16,float16,0,0.04604800045490265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,4,64,0,1,float16,float16,0,0.047882666190465294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,4,64,128,1,float16,fp8,0,0.0473280002673467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,4,64,128,1,fp8,fp8,0,0.0454773356517156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,4,64,0,1,float16,fp8,0,0.048058668772379555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,4,64,0,1,fp8,fp8,0,0.04529599845409393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,12,64,128,1,float16,float16,0,0.03197866678237915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,12,64,0,1,float16,float16,0,0.03311466674009959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,12,64,128,1,float16,fp8,0,0.031914666295051575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,12,64,128,1,fp8,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,12,64,0,1,float16,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,12,64,0,1,fp8,fp8,0,0.03200000027815501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,1,64,128,1,float16,float16,0,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,1,64,0,1,float16,float16,0,0.03146666785081228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,1,64,128,1,float16,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,1,64,128,1,fp8,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,1,64,0,1,float16,fp8,0,0.03189333279927572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,2,64,128,1,float16,float16,0,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,1,64,0,1,fp8,fp8,0,0.029653333127498627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,2,64,0,1,float16,float16,0,0.031445334355036415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,2,64,128,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,2,64,128,1,fp8,fp8,0,0.029717333614826202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,2,64,0,1,float16,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,2,64,0,1,fp8,fp8,0,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,4,64,128,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,4,64,0,1,float16,float16,0,0.03180799881617228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,4,64,128,1,float16,fp8,0,0.032885332902272545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,4,64,128,1,fp8,fp8,0,0.03143999973932902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,4,64,0,1,float16,fp8,0,0.033157333731651306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,4,64,0,1,fp8,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,12,64,128,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,12,64,0,1,float16,float16,0,0.021546666820844013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,12,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,12,64,128,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,12,64,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,1,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,12,64,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,1,64,0,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,1,64,128,1,float16,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,1,64,0,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,1,64,128,1,fp8,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,1,64,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,2,64,128,1,float16,float16,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,2,64,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,2,64,128,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,2,64,128,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,2,64,0,1,float16,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,2,64,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,4,64,128,1,float16,float16,0,0.02160533269246419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,4,64,0,1,float16,float16,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,4,64,128,1,float16,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,4,64,128,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,4,64,0,1,float16,fp8,0,0.021781332790851593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,4,64,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,12,64,128,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,12,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,12,64,128,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,12,64,128,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,12,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,12,64,0,1,fp8,fp8,0,0.018181333939234417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,1,64,128,1,float16,float16,0,0.017562666287024815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,1,64,0,1,float16,float16,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,1,64,128,1,float16,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,1,64,128,1,fp8,fp8,0,0.017674667139848072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,1,64,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,1,64,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,2,64,128,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,2,64,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,2,64,128,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,2,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,2,64,0,1,float16,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,2,64,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,4,64,128,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,4,64,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,4,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,4,64,128,1,fp8,fp8,0,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,4,64,0,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,4,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,12,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,12,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,12,64,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,12,64,128,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,12,64,0,1,float16,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,12,64,0,1,fp8,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,1,64,128,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,1,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,1,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,1,64,128,1,fp8,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,1,64,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,1,64,0,1,fp8,fp8,0,0.015840000162522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,2,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,2,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,2,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,2,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,2,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,2,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,4,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,4,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,4,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,4,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,4,64,128,1,float16,fp8,0,0.01580799991885821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,4,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,12,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,12,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,12,64,128,1,float16,fp8,0,0.01570133368174235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,12,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,12,64,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,12,64,0,1,fp8,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,1,64,0,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,1,64,128,1,fp8,fp8,0,0.015717333803574245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,1,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,2,64,128,1,float16,float16,0,0.015647999942302704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,2,64,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,2,64,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,2,64,128,1,fp8,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,2,64,0,1,float16,fp8,0,0.01565333331624667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,2,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,4,64,128,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,4,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,4,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,4,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,4,64,128,1,fp8,fp8,0,0.0161013330022494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,4,64,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,12,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,12,64,0,1,float16,float16,0,0.01580799991885821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,12,64,128,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,12,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,12,64,0,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,12,64,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,1,64,128,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,1,64,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,1,64,128,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,1,64,128,1,fp8,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,1,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,1,64,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,2,64,128,1,float16,float16,0,0.015552000453074774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,2,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,2,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,2,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,2,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,2,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,4,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,4,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,4,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,4,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,4,64,0,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,4,64,0,1,fp8,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,12,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,12,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,12,64,128,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,12,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,12,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,12,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,1,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,1,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,1,64,128,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,1,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,1,64,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,1,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,2,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,2,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,2,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,2,64,128,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,2,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,2,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,4,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,4,64,128,1,float16,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,4,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,4,64,128,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,4,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,4,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,1,64,128,1,float16,float16,0,0.07044800122578938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,1,64,0,1,float16,float16,0,0.06863466898600261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,1,64,128,1,float16,fp8,0,0.06832000116507213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,1,64,128,1,fp8,fp8,0,0.06698666512966156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,1,64,0,1,float16,fp8,0,0.06860266625881195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,1,64,0,1,fp8,fp8,0,0.06663466493288676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,2,64,128,1,float16,float16,0,0.0697866678237915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,2,64,0,1,float16,float16,0,0.06861866513888042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,2,64,128,1,float16,fp8,0,0.07008533179759979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,2,64,128,1,fp8,fp8,0,0.06619733572006226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,2,64,0,1,float16,fp8,0,0.07012266914049785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,2,64,0,1,fp8,fp8,0,0.06664533416430156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,4,64,128,1,float16,float16,0,0.06994133194287618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,4,64,0,1,float16,float16,0,0.07062933345635732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,4,64,128,1,float16,fp8,0,0.07051733136177063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,4,64,128,1,fp8,fp8,0,0.06644799808661143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,4,64,0,1,float16,fp8,0,0.07057600220044453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,4,64,0,1,fp8,fp8,0,0.06482666730880737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,12,64,128,1,float16,float16,0,0.04430399835109711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,12,64,0,1,float16,float16,0,0.043807998299598694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,12,64,128,1,float16,fp8,0,0.0439626673857371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,12,64,128,1,fp8,fp8,0,0.04178133110205332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,12,64,0,1,fp8,fp8,0,0.04195733368396759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,1,64,128,1,float16,float16,0,0.0421013335386912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,12,64,0,1,float16,fp8,0,0.043935999274253845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,1,64,0,1,float16,float16,0,0.04177066683769226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,1,64,128,1,float16,fp8,0,0.04363733530044556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,1,64,128,1,fp8,fp8,0,0.03977066775163015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,1,64,0,1,float16,fp8,0,0.04250133534272512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,1,64,0,1,fp8,fp8,0,0.03985599925120672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,2,64,128,1,float16,float16,0,0.04340266684691111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,2,64,0,1,float16,float16,0,0.042080000042915344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,2,64,128,1,fp8,fp8,0,0.041936000188191734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,2,64,128,1,float16,fp8,0,0.04221866528193156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,2,64,0,1,float16,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,2,64,0,1,fp8,fp8,0,0.04162133236726125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,4,64,128,1,float16,float16,0,0.04213866591453552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,4,64,0,1,float16,float16,0,0.04204266766707102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,4,64,128,1,float16,fp8,0,0.0431573341290156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,4,64,128,1,fp8,fp8,0,0.04195199906826019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,4,64,0,1,float16,fp8,0,0.04387733340263367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,4,64,0,1,fp8,fp8,0,0.04165866722663244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,12,64,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,12,64,128,1,float16,float16,0,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,12,64,128,1,float16,fp8,0,0.028858666618665058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,12,64,0,1,float16,fp8,0,0.027813332776228588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,12,64,128,1,fp8,fp8,0,0.0276053324341774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,12,64,0,1,fp8,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,1,64,128,1,float16,float16,0,0.027242665489514668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,1,64,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,1,64,128,1,float16,fp8,0,0.028538666665554047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,1,64,128,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,1,64,0,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,2,64,128,1,float16,float16,0,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,1,64,0,1,fp8,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,2,64,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,2,64,128,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,2,64,0,1,float16,fp8,0,0.02886933336655299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,2,64,128,1,fp8,fp8,0,0.026869334280490875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,2,64,0,1,fp8,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,4,64,128,1,float16,float16,0,0.027888000011444092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,4,64,0,1,float16,float16,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,4,64,128,1,float16,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,4,64,128,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,4,64,0,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,4,64,0,1,fp8,fp8,0,0.026943999032179516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,12,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,12,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,12,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,12,64,128,1,fp8,fp8,0,0.020576000213623047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,12,64,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,12,64,0,1,fp8,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,1,64,128,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,1,64,0,1,float16,float16,0,0.021642667551835377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,1,64,128,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,1,64,128,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,1,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,1,64,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,2,64,128,1,float16,float16,0,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,2,64,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,2,64,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,2,64,128,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,2,64,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,2,64,0,1,fp8,fp8,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,4,64,128,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,4,64,0,1,float16,float16,0,0.022874665757020313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,4,64,128,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,4,64,128,1,fp8,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,4,64,0,1,float16,fp8,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,4,64,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,12,64,128,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,12,64,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,12,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,12,64,128,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,12,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,12,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,1,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,1,64,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,1,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,1,64,128,1,fp8,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,1,64,0,1,float16,fp8,0,0.017530667285124462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,1,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,2,64,128,1,float16,float16,0,0.018794666975736618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,2,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,2,64,128,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,2,64,128,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,2,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,2,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,4,64,128,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,4,64,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,4,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,4,64,128,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,4,64,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,4,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,12,64,128,1,float16,float16,0,0.016458666572968166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,12,64,0,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,12,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,12,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,12,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,12,64,0,1,float16,fp8,0,0.01575999955336253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,1,64,128,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,1,64,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,1,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,1,64,128,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,1,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,1,64,0,1,fp8,fp8,0,0.016544000556071598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,2,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,2,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,2,64,128,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,2,64,128,1,fp8,fp8,0,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,2,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,4,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,2,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,4,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,4,64,128,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,4,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,4,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,4,64,0,1,fp8,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,12,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,12,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,12,64,128,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,12,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,12,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,12,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,1,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,1,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,1,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,1,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,1,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,1,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,2,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,2,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,2,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,2,64,128,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,2,64,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,2,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,4,64,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,4,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,4,64,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,4,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,4,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,4,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,12,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,12,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,12,64,128,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,12,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,12,64,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,12,64,0,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,1,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,1,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,1,64,128,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,1,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,1,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,1,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,2,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,2,64,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,2,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,2,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,2,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,4,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,4,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,4,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,4,64,128,1,float16,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,4,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,4,64,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,12,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,12,64,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,12,64,128,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,12,64,128,1,fp8,fp8,0,0.014783999572197596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,12,64,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,12,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,1,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,1,64,128,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,1,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,1,64,0,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,2,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,2,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,2,64,128,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,2,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,2,64,0,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,2,64,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,4,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,4,64,0,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,4,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,4,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,4,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,4,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,1,64,0,1,float16,float16,0,0.06027733286221822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,1,64,128,1,float16,float16,0,0.06033066908518473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,1,64,128,1,float16,fp8,0,0.06046933432420095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,1,64,128,1,fp8,fp8,0,0.05611733098824819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,1,64,0,1,float16,fp8,0,0.06036800146102905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,1,64,0,1,fp8,fp8,0,0.056330665946006775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,2,64,128,1,float16,float16,0,0.06057066718737284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,2,64,0,1,float16,float16,0,0.060319999853769936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,2,64,128,1,float16,fp8,0,0.062447999914487205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,2,64,128,1,fp8,fp8,0,0.056218668818473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,2,64,0,1,float16,fp8,0,0.06033066908518473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,2,64,0,1,fp8,fp8,0,0.05644266804059347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,4,64,128,1,float16,float16,0,0.06028266747792562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,4,64,0,1,float16,float16,0,0.060229331254959106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,4,64,128,1,float16,fp8,0,0.06256533165772755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,4,64,0,1,float16,fp8,0,0.060234665870666504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,4,64,128,1,fp8,fp8,0,0.056757330894470215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,12,64,128,1,float16,float16,0,0.037776000797748566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,4,64,0,1,fp8,fp8,0,0.05610666672388712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,12,64,128,1,float16,fp8,0,0.03745066622893015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,0,0.03790933390458425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,12,64,128,1,fp8,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,12,64,0,1,fp8,fp8,0,0.03563733398914337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,1,64,128,1,float16,float16,0,0.03756800045569738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,1,64,128,1,float16,fp8,0,0.0378560001651446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,1,64,0,1,float16,float16,0,0.03801066676775614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,1,64,128,1,fp8,fp8,0,0.03603200117746989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,1,64,0,1,float16,fp8,0,0.03807999938726425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,1,64,0,1,fp8,fp8,0,0.03486400097608566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,2,64,128,1,float16,float16,0,0.03775999943415324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,2,64,0,1,float16,float16,0,0.03751999884843826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,2,64,128,1,float16,fp8,0,0.03790933390458425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,2,64,128,1,fp8,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,2,64,0,1,float16,fp8,0,0.037920000652472176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,4,64,128,1,float16,float16,0,0.0378506655494372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,2,64,0,1,fp8,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,4,64,0,1,float16,float16,0,0.03797333439191183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,4,64,128,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,4,64,128,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,4,64,0,1,float16,fp8,0,0.038058665891488395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,12,64,128,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,0,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,4,64,0,1,fp8,fp8,0,0.035461333890755974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,12,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,12,64,128,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,0,0.027450665831565857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,1,64,128,1,float16,float16,0,0.025797332326571148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,12,64,0,1,fp8,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,1,64,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,1,64,128,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,1,64,128,1,fp8,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,1,64,0,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,1,64,0,1,fp8,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,2,64,128,1,float16,float16,0,0.027189334233601887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,2,64,0,1,float16,float16,0,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,2,64,128,1,float16,fp8,0,0.02737066646416982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,2,64,128,1,fp8,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,2,64,0,1,float16,fp8,0,0.029093332588672638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,2,64,0,1,fp8,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,4,64,128,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,4,64,0,1,float16,float16,0,0.025781333446502686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,4,64,128,1,float16,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,4,64,128,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,4,64,0,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,4,64,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,12,64,128,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,0,0.019578666736682255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,12,64,128,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,12,64,128,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,12,64,0,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,1,64,128,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,1,64,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,1,64,128,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,1,64,128,1,fp8,fp8,0,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,1,64,0,1,float16,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,1,64,0,1,fp8,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,2,64,128,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,2,64,0,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,2,64,128,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,2,64,128,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,2,64,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,2,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,4,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,4,64,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,4,64,128,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,4,64,128,1,fp8,fp8,0,0.02025066688656807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,4,64,0,1,float16,fp8,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,4,64,0,1,fp8,fp8,0,0.019744000087181728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,12,64,128,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,0,0.01798933371901512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,12,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,12,64,128,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,12,64,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,1,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,1,64,0,1,float16,float16,0,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,1,64,128,1,float16,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,1,64,128,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,1,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,1,64,0,1,fp8,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,2,64,128,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,2,64,128,1,float16,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,2,64,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,2,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,2,64,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,2,64,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,4,64,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,4,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,4,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,4,64,128,1,float16,float16,0,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,4,64,0,1,float16,fp8,0,0.017818666994571686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,4,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,12,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,0,0.017935999979575474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,12,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,12,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,1,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,1,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,1,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,12,64,0,1,fp8,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,1,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,1,64,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,1,64,0,1,fp8,fp8,0,0.016000000139077503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,2,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,2,64,0,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,2,64,128,1,fp8,fp8,0,0.016517333686351776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,2,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,2,64,128,1,float16,fp8,0,0.01786133274435997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,2,64,0,1,fp8,fp8,0,0.01634666696190834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,4,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,4,64,0,1,float16,float16,0,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,4,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,4,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,4,64,0,1,float16,fp8,0,0.01860800012946129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,4,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,12,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,12,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,12,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,1,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,12,64,0,1,fp8,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,1,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,1,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,1,64,128,1,fp8,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,1,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,1,64,0,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,2,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,2,64,128,1,float16,float16,0,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,2,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,2,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,2,64,0,1,float16,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,2,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,4,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,4,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,4,64,0,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,4,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,4,64,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,4,64,0,1,fp8,fp8,0,0.015664000064134598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,12,64,128,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,12,64,128,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,12,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,12,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,1,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,1,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,1,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,1,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,2,64,128,1,float16,float16,0,0.015791999797026317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,2,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,2,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,2,64,0,1,float16,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,2,64,128,1,fp8,fp8,0,0.015610666324694952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,2,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,4,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,4,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,4,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,4,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,4,64,0,1,float16,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,4,64,0,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,12,64,128,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,12,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,12,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,12,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,1,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,1,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,1,64,128,1,float16,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,1,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,1,64,0,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,1,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,2,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,2,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,2,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,2,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,2,64,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,2,64,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,4,64,128,1,float16,float16,0,0.014725333700577417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,4,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,4,64,128,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,4,64,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,4,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,4,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,float16,0,0.4155199925104777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,1,64,128,1,float16,fp8,0,0.4192053476969401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,1,64,128,1,fp8,fp8,0,0.3943519989649455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,float16,0,2.198512077331543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,float16,0,0.42764798800150555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,2,64,128,1,float16,fp8,0,0.4310773213704427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,1,64,0,1,float16,fp8,0,2.2135094006856284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,1,64,0,1,fp8,fp8,0,1.9124213854471843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,2,64,128,1,fp8,fp8,0,0.40833067893981934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,float16,0,2.21941868464152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,float16,0,0.4416266679763794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,4,64,128,1,float16,fp8,0,0.44737064838409424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,2,64,0,1,float16,fp8,0,2.2274667421976724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,2,64,0,1,fp8,fp8,0,1.9267466862996419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,4,64,128,1,fp8,fp8,0,0.42533334096272785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,float16,0,2.236975987752279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,float16,0,0.2531733314196269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,4,64,0,1,float16,fp8,0,2.2420639991760254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,4,64,0,1,fp8,fp8,0,1.9459840456644695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,8,64,128,1,float16,fp8,0,0.25914667050043744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,float16,0,1.1943573156992595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,8,64,128,1,fp8,fp8,0,0.24869332710901895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,float16,0,0.22436267137527466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,8,64,0,1,float16,fp8,0,1.1993707021077473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,8,64,0,1,fp8,fp8,0,1.042954683303833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,1,64,128,1,float16,fp8,0,0.2265440026919047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,1,64,128,1,fp8,fp8,0,0.21581333875656128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,float16,0,1.1596533457438152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,float16,0,0.22907733917236328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,1,64,0,1,fp8,fp8,0,1.0072960058848064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,1,64,0,1,float16,fp8,0,1.160970687866211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,2,64,128,1,float16,fp8,0,0.23276267449061075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,2,64,128,1,fp8,fp8,0,0.2227733333905538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,float16,0,1.164410670598348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,float16,0,0.2368053396542867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,4,64,128,1,float16,fp8,0,0.24054932594299316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,2,64,0,1,float16,fp8,0,1.163541316986084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,2,64,0,1,fp8,fp8,0,1.0154133637746174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,4,64,128,1,fp8,fp8,0,0.23030932744344076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,float16,0,1.1742773056030273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,float16,0,0.14645333091417947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,4,64,0,1,fp8,fp8,0,1.0243466695149739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,4,64,0,1,float16,fp8,0,1.1783253351847331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,8,64,128,1,float16,fp8,0,0.14941333731015524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,float16,0,0.6579413414001465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,8,64,128,1,fp8,fp8,0,0.14460266629854837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,float16,0,0.12803733348846436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,8,64,0,1,fp8,fp8,0,0.5814293225606283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,8,64,0,1,float16,fp8,0,0.6605120102564493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,1,64,128,1,float16,fp8,0,0.12782933314641318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,1,64,128,1,fp8,fp8,0,0.12406933307647705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,float16,0,0.6349226633707682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,1,64,0,1,float16,fp8,0,0.6343466838200887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,float16,0,0.12970133622487387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,1,64,0,1,fp8,fp8,0,0.5546826521555582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,2,64,128,1,float16,fp8,0,0.13005333145459494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,2,64,128,1,fp8,fp8,0,0.12784533699353537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,float16,0,0.6382826566696167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,float16,0,0.13641066352526346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,2,64,0,1,float16,fp8,0,0.6409386793772379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,2,64,0,1,fp8,fp8,0,0.559333324432373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,float16,0,0.6437866687774658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,4,64,128,1,float16,fp8,0,0.1381706694761912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,4,64,128,1,fp8,fp8,0,0.13436266779899597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,float16,0,0.10645332932472229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,float16,0,0.40052799383799237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,4,64,0,1,float16,fp8,0,0.6454293330510458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,4,64,0,1,fp8,fp8,0,0.5689119895299276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,8,64,128,1,float16,fp8,0,0.10553600390752156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,8,64,128,1,fp8,fp8,0,0.10310399532318115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,float16,0,0.10520533720652263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,8,64,0,1,float16,fp8,0,0.39997867743174237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,8,64,0,1,fp8,fp8,0,0.35517334938049316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,1,64,128,1,float16,fp8,0,0.10549867153167725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,1,64,128,1,fp8,fp8,0,0.10153599580128987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,float16,0,0.39931201934814453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,1,64,0,1,float16,fp8,0,0.3985439936319987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,float16,0,0.10515200098355611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,1,64,0,1,fp8,fp8,0,0.3499679962793986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,float16,0,0.3999520142873128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,2,64,128,1,float16,fp8,0,0.10528000195821126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,2,64,128,1,fp8,fp8,0,0.10105066498120625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,float16,0,0.10496000448862712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,2,64,0,1,float16,fp8,0,0.39907201131184894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,2,64,0,1,fp8,fp8,0,0.35094932715098065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,float16,0,0.39768532911936444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,4,64,128,1,float16,fp8,0,0.10514666636784871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,4,64,128,1,fp8,fp8,0,0.10132267077763875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,4,64,0,1,float16,fp8,0,0.3996640046437581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,4,64,0,1,fp8,fp8,0,0.34886399904886883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,float16,0,0.3163093328475952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,1,64,128,1,float16,fp8,0,0.3186346689860026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,1,64,128,1,fp8,fp8,0,0.30008000135421753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,float16,0,1.327781359354655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,float16,0,0.324127991994222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,1,64,0,1,float16,fp8,0,1.3281439940134685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,2,64,128,1,float16,fp8,0,0.32763199011484784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,1,64,0,1,fp8,fp8,0,1.1587039629618328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,2,64,128,1,fp8,fp8,0,0.3097653388977051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,float16,0,1.3335839907328289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,float16,0,0.33298667271931964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,2,64,0,1,fp8,fp8,0,1.168837308883667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,2,64,0,1,float16,fp8,0,1.3396053314208984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,4,64,128,1,float16,fp8,0,0.33874666690826416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,4,64,128,1,fp8,fp8,0,0.324127991994222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,float16,0,1.3500266075134277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,float16,0,0.19506667057673135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,4,64,0,1,fp8,fp8,0,1.180560032526652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,4,64,0,1,float16,fp8,0,1.3534773190816243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,8,64,128,1,float16,fp8,0,0.199072003364563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,float16,0,0.7341866493225098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,8,64,128,1,fp8,fp8,0,0.1918933391571045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,float16,0,0.17070933183034262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,8,64,0,1,fp8,fp8,0,0.6502346595128378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,8,64,0,1,float16,fp8,0,0.7385919888814291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,1,64,128,1,float16,fp8,0,0.17323732376098633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,1,64,128,1,fp8,fp8,0,0.16707199811935425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,float16,0,0.7051786581675211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,float16,0,0.1751306653022766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,1,64,0,1,fp8,fp8,0,0.6215146780014038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,1,64,0,1,float16,fp8,0,0.7094026406606039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,2,64,128,1,float16,fp8,0,0.1770346760749817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,2,64,128,1,fp8,fp8,0,0.17111466328303018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,float16,0,0.7094346682230631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,float16,0,0.1834933360417684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,2,64,0,1,float16,fp8,0,0.7099786599477133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,2,64,0,1,fp8,fp8,0,0.6273706754048666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,4,64,128,1,float16,fp8,0,0.18588266770044962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,4,64,128,1,fp8,fp8,0,0.177839994430542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,float16,0,0.720682700475057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,float16,0,0.114656001329422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,4,64,0,1,fp8,fp8,0,0.6348426739374796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,4,64,0,1,float16,fp8,0,0.7202560106913248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,8,64,128,1,fp8,fp8,0,0.11520000298817952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,8,64,128,1,float16,fp8,0,0.11607999602953593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,float16,0,0.41489601135253906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,float16,0,0.10165866216023763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,8,64,0,1,fp8,fp8,0,0.3697333335876465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,8,64,0,1,float16,fp8,0,0.4158773422241211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,1,64,128,1,fp8,fp8,0,0.09699733058611552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,1,64,128,1,float16,fp8,0,0.10126933455467224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,float16,0,0.39821334679921466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,float16,0,0.10190932949384053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,1,64,0,1,fp8,fp8,0,0.34782934188842773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,1,64,0,1,float16,fp8,0,0.3986613353093465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,2,64,128,1,fp8,fp8,0,0.09909333785374959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,2,64,128,1,float16,fp8,0,0.10354133447011311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,float16,0,0.398911992708842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,float16,0,0.10605333248774211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,2,64,0,1,fp8,fp8,0,0.34924264748891193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,2,64,0,1,float16,fp8,0,0.40019198258717853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,float16,0,0.4022879997889201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,4,64,128,1,float16,fp8,0,0.10757866501808167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,4,64,128,1,fp8,fp8,0,0.10564266641934712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,float16,0,0.08297599852085114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,4,64,0,1,float16,fp8,0,0.4036693175633748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,4,64,0,1,fp8,fp8,0,0.3596213261286418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,float16,0,0.26119999090830487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,8,64,128,1,float16,fp8,0,0.08294400076071422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,8,64,128,1,fp8,fp8,0,0.07857066889603932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,8,64,0,1,float16,fp8,0,0.26359466711680096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,float16,0,0.08282666901747386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,8,64,0,1,fp8,fp8,0,0.23056532939275107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,float16,0,0.2601333260536194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,1,64,128,1,float16,fp8,0,0.08302400012811025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,1,64,128,1,fp8,fp8,0,0.07878399888674419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,1,64,0,1,float16,fp8,0,0.2611413399378459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,float16,0,0.08256533245245616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,1,64,0,1,fp8,fp8,0,0.2302239934603373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,2,64,128,1,float16,fp8,0,0.08258666594823201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,float16,0,0.2611626585324605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,2,64,128,1,fp8,fp8,0,0.07891733447710673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,2,64,0,1,float16,fp8,0,0.2610293428103129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,float16,0,0.08328533172607422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,2,64,0,1,fp8,fp8,0,0.2294666568438212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,float16,0,0.26131200790405273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,4,64,128,1,float16,fp8,0,0.0827893316745758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,4,64,128,1,fp8,fp8,0,0.08062933385372162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,4,64,0,1,float16,fp8,0,0.2616853316624959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,4,64,0,1,fp8,fp8,0,0.2321280042330424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,float16,0,0.26183466116587323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,1,64,128,1,float16,fp8,0,0.2649173339207967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,1,64,128,1,fp8,fp8,0,0.25221333901087445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,float16,0,0.9712639649709066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,float16,0,0.26893866062164307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,1,64,0,1,float16,fp8,0,0.9715999762217203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,1,64,0,1,fp8,fp8,0,0.8484000364939371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,2,64,128,1,float16,fp8,0,0.2717866698900859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,2,64,128,1,fp8,fp8,0,0.258735994497935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,float16,0,0.9795946280161539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,float16,0,0.2770773371060689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,2,64,0,1,float16,fp8,0,0.977674643198649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,2,64,0,1,fp8,fp8,0,0.8579839865366617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,4,64,128,1,float16,fp8,0,0.28357867399851483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,4,64,128,1,fp8,fp8,0,0.269381324450175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,float16,0,0.9871359666188558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,float16,0,0.16313067078590393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,4,64,0,1,float16,fp8,0,0.9938666820526123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,float16,0,0.5454560120900472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,4,64,0,1,fp8,fp8,0,0.8678879737854004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,8,64,128,1,float16,fp8,0,0.16702399651209512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,8,64,128,1,fp8,fp8,0,0.16300266981124878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,float16,0,0.1421226660410563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,8,64,0,1,fp8,fp8,0,0.4845386743545532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,8,64,0,1,float16,fp8,0,0.5504213174184164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,1,64,128,1,float16,fp8,0,0.14480533202489218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,float16,0,0.5193813244501749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,1,64,128,1,fp8,fp8,0,0.13858667016029358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,1,64,0,1,float16,fp8,0,0.5216106573740641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,1,64,0,1,fp8,fp8,0,0.4613120158513387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,float16,0,0.14654399951299033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,float16,0,0.5233493248621622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,2,64,128,1,fp8,fp8,0,0.14492266376813254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,2,64,128,1,float16,fp8,0,0.14820266763369241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,2,64,0,1,float16,fp8,0,0.5270080169041952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,float16,0,0.15261866648991904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,2,64,0,1,fp8,fp8,0,0.4641546805699666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,4,64,128,1,float16,fp8,0,0.15481066703796387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,4,64,128,1,fp8,fp8,0,0.1507306694984436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,float16,0,0.5316053231557211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,float16,0,0.09564266602198283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,4,64,0,1,float16,fp8,0,0.5355999867121378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,4,64,0,1,fp8,fp8,0,0.47067201137542725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,8,64,128,1,float16,fp8,0,0.09929600358009338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,float16,0,0.310698668162028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,8,64,128,1,fp8,fp8,0,0.09904000163078308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,float16,0,0.08754666646321614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,8,64,0,1,float16,fp8,0,0.31407467524210614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,8,64,0,1,fp8,fp8,0,0.28035734097162884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,1,64,128,1,float16,fp8,0,0.08917333682378133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,1,64,128,1,fp8,fp8,0,0.08480532964070638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,float16,0,0.2995679974555969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,float16,0,0.08712533116340637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,1,64,0,1,fp8,fp8,0,0.2632319927215576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,1,64,0,1,float16,fp8,0,0.30185600121816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,2,64,128,1,float16,fp8,0,0.08881066242853801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,2,64,128,1,fp8,fp8,0,0.0849226713180542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,float16,0,0.30003732442855835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,float16,0,0.08896000186602275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,2,64,0,1,fp8,fp8,0,0.2643413345019023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,2,64,0,1,float16,fp8,0,0.30238399902979535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,4,64,128,1,float16,fp8,0,0.09134399890899658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,4,64,128,1,fp8,fp8,0,0.08921066919962566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,float16,0,0.30290667215983075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,float16,0,0.07439466814200084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,4,64,0,1,float16,fp8,0,0.30427199602127075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,4,64,0,1,fp8,fp8,0,0.2696320017178853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,float16,0,0.20459733406702676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,8,64,128,1,float16,fp8,0,0.07468266785144806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,8,64,128,1,fp8,fp8,0,0.07066666583220164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,float16,0,0.07481599847475688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,8,64,0,1,float16,fp8,0,0.2058453361193339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,8,64,0,1,fp8,fp8,0,0.18284799655278525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,1,64,128,1,float16,fp8,0,0.07438399891058604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,1,64,128,1,fp8,fp8,0,0.07092800239721934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,float16,0,0.2056480050086975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,1,64,0,1,float16,fp8,0,0.20563733577728271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,float16,0,0.07267199953397115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,1,64,0,1,fp8,fp8,0,0.18125865856806436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,2,64,128,1,float16,fp8,0,0.0730506678422292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,2,64,128,1,fp8,fp8,0,0.07061866422494252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,float16,0,0.20643732945124307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,2,64,0,1,float16,fp8,0,0.20676799615224203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,float16,0,0.07446933289368947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,2,64,0,1,fp8,fp8,0,0.18119466304779053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,float16,0,0.20570667584737143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,4,64,128,1,float16,fp8,0,0.07479999959468842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,4,64,128,1,fp8,fp8,0,0.07063466807206471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,4,64,0,1,float16,fp8,0,0.20682666699091592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,4,64,0,1,fp8,fp8,0,0.18120533227920532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,float16,0,0.4063466787338257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,1,64,128,1,float16,fp8,0,0.4102933406829834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,1,64,128,1,fp8,fp8,0,0.3840746482213338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,float16,0,1.2771893342336018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,float16,0,0.4175306558609009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,1,64,0,1,fp8,fp8,0,1.117136001586914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,1,64,0,1,float16,fp8,0,1.2764960130055745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,2,64,128,1,float16,fp8,0,0.42127466201782227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,2,64,128,1,fp8,fp8,0,0.4002186854680379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,float16,0,1.2866613070170085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,2,64,0,1,fp8,fp8,0,1.1287893454233806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,float16,0,0.433354655901591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,2,64,0,1,float16,fp8,0,1.2915466626485188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,4,64,128,1,float16,fp8,0,0.4363306760787964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,4,64,128,1,fp8,fp8,0,0.4163680076599121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,float16,0,1.3075253168741863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,float16,0,0.24263467391331991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,4,64,0,1,float16,fp8,0,1.310821294784546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,8,64,128,1,float16,fp8,0,0.24898666143417358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,float16,0,0.7013920148213705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,4,64,0,1,fp8,fp8,0,1.1476373672485352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,8,64,128,1,fp8,fp8,0,0.23760000864664713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,float16,0,0.21181867520014444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,8,64,0,1,float16,fp8,0,0.7076906363169352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,8,64,0,1,fp8,fp8,0,0.6246773401896158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,1,64,128,1,float16,fp8,0,0.21496532360712686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,float16,0,0.6655146678288778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,1,64,128,1,fp8,fp8,0,0.20561067263285318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,float16,0,0.2180746595064799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,1,64,0,1,float16,fp8,0,0.6697173118591309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,1,64,0,1,fp8,fp8,0,0.5871093273162842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,2,64,128,1,float16,fp8,0,0.22017600138982138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,float16,0,0.6720426877339681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,2,64,128,1,fp8,fp8,0,0.2118026614189148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,float16,0,0.22670400142669678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,2,64,0,1,float16,fp8,0,0.6739947001139323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,2,64,0,1,fp8,fp8,0,0.5948426723480225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,float16,0,0.6828853289286295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,4,64,128,1,float16,fp8,0,0.23086933294932047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,4,64,128,1,fp8,fp8,0,0.22075732549031576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,float16,0,0.13408000270525613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,4,64,0,1,float16,fp8,0,0.6864159901936849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,float16,0,0.3826560179392497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,8,64,128,1,float16,fp8,0,0.13667200009028116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,4,64,0,1,fp8,fp8,0,0.6071893374125162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,8,64,128,1,fp8,fp8,0,0.13195733229319254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,8,64,0,1,float16,fp8,0,0.38764798641204834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,8,64,0,1,fp8,fp8,0,0.34330133597056073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,float16,0,0.11347200473149617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,1,64,128,1,float16,fp8,0,0.11563199758529663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,1,64,128,1,fp8,fp8,0,0.10958400368690491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,float16,0,0.3618933359781901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,float16,0,0.11626666784286499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,1,64,0,1,float16,fp8,0,0.3635573387145996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,1,64,0,1,fp8,fp8,0,0.32018133004506427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,2,64,128,1,float16,fp8,0,0.11931733290354411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,2,64,128,1,fp8,fp8,0,0.11575466394424438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,float16,0,0.3654133478800456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,2,64,0,1,fp8,fp8,0,0.3239946762720744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,float16,0,0.12191999951998393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,2,64,0,1,float16,fp8,0,0.3659466505050659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,4,64,128,1,float16,fp8,0,0.12387733658154805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,4,64,128,1,fp8,fp8,0,0.1223360002040863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,float16,0,0.37188267707824707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,float16,0,0.07861333092053731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,4,64,0,1,fp8,fp8,0,0.3333386580149333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,4,64,0,1,float16,fp8,0,0.37373868624369305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,8,64,128,1,fp8,fp8,0,0.08066666622956593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,8,64,128,1,float16,fp8,0,0.08077866832415263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,float16,0,0.22241065899531046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,float16,0,0.07259733478228252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,8,64,0,1,float16,fp8,0,0.22406399250030518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,8,64,0,1,fp8,fp8,0,0.20353599389394125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,1,64,128,1,float16,fp8,0,0.07262399792671204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,1,64,128,1,fp8,fp8,0,0.07035733262697856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,float16,0,0.21681066354115805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,float16,0,0.0732586681842804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,1,64,0,1,float16,fp8,0,0.21592533588409424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,1,64,0,1,fp8,fp8,0,0.18945066134134927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,2,64,128,1,float16,fp8,0,0.07331199944019318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,2,64,128,1,fp8,fp8,0,0.07073600093523662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,float16,0,0.21792533000310263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,2,64,0,1,float16,fp8,0,0.21787200371424356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,2,64,0,1,fp8,fp8,0,0.1914506753285726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,float16,0,0.07481599847475688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,4,64,128,1,float16,fp8,0,0.0766133318344752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,4,64,128,1,fp8,fp8,0,0.07266133526961009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,float16,0,0.21821866432825723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,float16,0,0.060309335589408875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,4,64,0,1,fp8,fp8,0,0.19343467553456625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,4,64,0,1,float16,fp8,0,0.22126932938893637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,8,64,128,1,float16,fp8,0,0.05834666887919108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,float16,0,0.15493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,8,64,128,1,fp8,fp8,0,0.05825066566467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,float16,0,0.058261334896087646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,8,64,0,1,float16,fp8,0,0.15503999590873718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,8,64,0,1,fp8,fp8,0,0.13820266723632812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,float16,0,0.15408000349998474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,1,64,128,1,float16,fp8,0,0.06002666552861532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,1,64,0,1,float16,fp8,0,0.15449066956837973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,1,64,128,1,fp8,fp8,0,0.05665066838264465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,1,64,0,1,fp8,fp8,0,0.1362559994061788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,float16,0,0.06032533446947733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,2,64,128,1,fp8,fp8,0,0.05805333455403646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,float16,0,0.15291200081507364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,2,64,128,1,float16,fp8,0,0.05862399935722351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,2,64,0,1,float16,fp8,0,0.15475199619928995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,float16,0,0.05835733314355215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,2,64,0,1,fp8,fp8,0,0.13768000404040018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,float16,0,0.15260799725850424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,4,64,128,1,float16,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,4,64,128,1,fp8,fp8,0,0.059808000922203064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,4,64,0,1,float16,fp8,0,0.1546346644560496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,4,64,0,1,fp8,fp8,0,0.13637866576512656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,float16,0,0.30851733684539795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,1,64,128,1,float16,fp8,0,0.31090666850407916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,1,64,128,1,fp8,fp8,0,0.29258133967717487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,float16,0,0.7913973331451416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,float16,0,0.31801066795984906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,1,64,0,1,float16,fp8,0,0.7965599695841471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,1,64,0,1,fp8,fp8,0,0.699402650197347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,2,64,128,1,float16,fp8,0,0.32100266218185425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,float16,0,0.801637331644694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,2,64,128,1,fp8,fp8,0,0.30435200532277423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,float16,0,0.32727466026941937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,2,64,0,1,float16,fp8,0,0.8066933155059814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,2,64,0,1,fp8,fp8,0,0.7103573481241862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,float16,0,0.8167359828948975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,4,64,128,1,float16,fp8,0,0.3312106728553772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,4,64,128,1,fp8,fp8,0,0.31640533606211346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,float16,0,0.1853653391202291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,4,64,0,1,float16,fp8,0,0.8182400067647299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,float16,0,0.44785066445668537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,8,64,128,1,float16,fp8,0,0.19167466958363852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,4,64,0,1,fp8,fp8,0,0.7248053550720215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,8,64,128,1,fp8,fp8,0,0.18318933248519897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,float16,0,0.16105600198109946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,8,64,0,1,float16,fp8,0,0.45339731375376385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,8,64,0,1,fp8,fp8,0,0.4022773504257202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,1,64,128,1,float16,fp8,0,0.16267733772595724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,float16,0,0.4159359931945801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,1,64,128,1,fp8,fp8,0,0.15877866744995117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,1,64,0,1,float16,fp8,0,0.4188693364461263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,float16,0,0.16475733121236166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,1,64,0,1,fp8,fp8,0,0.37453333536783856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,2,64,128,1,float16,fp8,0,0.16833599408467612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,float16,0,0.4224319855372111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,2,64,128,1,fp8,fp8,0,0.16377600034077963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,float16,0,0.17523733774820963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,2,64,0,1,float16,fp8,0,0.4252479871114095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,2,64,0,1,fp8,fp8,0,0.37836798032124835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,4,64,128,1,float16,fp8,0,0.17553067207336426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,4,64,128,1,fp8,fp8,0,0.16894932587941489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,float16,0,0.43162667751312256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,float16,0,0.10390933354695638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,4,64,0,1,fp8,fp8,0,0.38435200850168866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,4,64,0,1,float16,fp8,0,0.4340960184733073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,8,64,128,1,float16,fp8,0,0.10628267129262288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,float16,0,0.2497439980506897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,8,64,128,1,fp8,fp8,0,0.10525332887967427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,float16,0,0.09125866492589314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,8,64,0,1,float16,fp8,0,0.2524106701215108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,8,64,0,1,fp8,fp8,0,0.2266719937324524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,1,64,128,1,float16,fp8,0,0.09299199779828389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,float16,0,0.23356266816457114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,1,64,128,1,fp8,fp8,0,0.0869760016600291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,float16,0,0.09288533528645833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,1,64,0,1,float16,fp8,0,0.2343519926071167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,1,64,0,1,fp8,fp8,0,0.20589866240819296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,2,64,128,1,float16,fp8,0,0.09356799721717834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,2,64,128,1,fp8,fp8,0,0.08933867017428081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,float16,0,0.23456533749898276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,float16,0,0.0956213374932607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,2,64,0,1,float16,fp8,0,0.23679467042287192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,2,64,0,1,fp8,fp8,0,0.20791999499003092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,4,64,128,1,float16,fp8,0,0.0990773340066274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,4,64,128,1,fp8,fp8,0,0.0953439970811208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,float16,0,0.23986667394638062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,float16,0,0.062496001521746315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,4,64,0,1,fp8,fp8,0,0.2153493364651998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,4,64,0,1,float16,fp8,0,0.24074133237202963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,8,64,128,1,float16,fp8,0,0.06308266520500183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,8,64,128,1,fp8,fp8,0,0.06206400195757548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,float16,0,0.14860799908638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,float16,0,0.05808533231417338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,8,64,0,1,fp8,fp8,0,0.1339946687221527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,8,64,0,1,float16,fp8,0,0.1502240002155304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,1,64,128,1,float16,fp8,0,0.058186665177345276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,1,64,128,1,fp8,fp8,0,0.05580266813437144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,float16,0,0.14436266819636026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,float16,0,0.058186665177345276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,1,64,0,1,float16,fp8,0,0.14472533265749613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,1,64,0,1,fp8,fp8,0,0.12803733348846436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,2,64,128,1,float16,fp8,0,0.060138667623202004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,2,64,128,1,fp8,fp8,0,0.05583466589450836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,float16,0,0.14531733592351279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,2,64,0,1,float16,fp8,0,0.14443733294804892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,2,64,0,1,fp8,fp8,0,0.12774933377901712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,float16,0,0.060218666990598045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,float16,0,0.14667200048764548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,4,64,128,1,float16,fp8,0,0.06005333364009857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,4,64,128,1,fp8,fp8,0,0.05789866546789805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,4,64,0,1,float16,fp8,0,0.14725333452224731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,4,64,0,1,fp8,fp8,0,0.13004266222318014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,float16,0,0.050144001841545105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,float16,0,0.10564800103505452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,8,64,128,1,float16,fp8,0,0.05166399975617727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,8,64,128,1,fp8,fp8,0,0.048138668139775596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,8,64,0,1,float16,fp8,0,0.10525866349538167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,8,64,0,1,fp8,fp8,0,0.09433600306510925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,float16,0,0.050069332122802734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,float16,0,0.10553066929181416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,1,64,128,1,float16,fp8,0,0.05177066723505656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,1,64,128,1,fp8,fp8,0,0.04776533444722494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,1,64,0,1,float16,fp8,0,0.10556800166765849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,1,64,0,1,fp8,fp8,0,0.09522666533788045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,float16,0,0.051589335004488625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,float16,0,0.10544000069300334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,2,64,128,1,float16,fp8,0,0.05004266897837321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,2,64,128,1,fp8,fp8,0,0.04990399877230326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,2,64,0,1,float16,fp8,0,0.10540800293286641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,2,64,0,1,fp8,fp8,0,0.09445333480834961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,float16,0,0.05003199974695841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,4,64,128,1,float16,fp8,0,0.05050133168697357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,float16,0,0.10541333754857381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,4,64,128,1,fp8,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,4,64,0,1,float16,fp8,0,0.10714667042096455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,4,64,0,1,fp8,fp8,0,0.0950879951318105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,float16,0,0.4047679901123047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,1,64,128,1,float16,fp8,0,0.4091626803080241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,1,64,128,1,fp8,fp8,0,0.38280534744262695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,float16,0,0.8066933155059814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,float16,0,0.4177333513895671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,1,64,0,1,float16,fp8,0,0.813157320022583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,1,64,0,1,fp8,fp8,0,0.7182133197784424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,float16,0,0.8214026292165121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,2,64,128,1,float16,fp8,0,0.42129600048065186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,2,64,128,1,fp8,fp8,0,0.39770134290059406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,2,64,0,1,float16,fp8,0,0.8253386815388998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,float16,0,0.4315253496170044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,2,64,0,1,fp8,fp8,0,0.7312853336334229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,float16,0,0.8410773277282715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,4,64,128,1,float16,fp8,0,0.43775999546051025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,4,64,128,1,fp8,fp8,0,0.41310401757558185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,float16,0,0.2374026576677958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,4,64,0,1,float16,fp8,0,0.8425013224283854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,4,64,0,1,fp8,fp8,0,0.7525973320007324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,8,64,128,1,float16,fp8,0,0.24290132522583008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,float16,0,0.4536426862080892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,8,64,128,1,fp8,fp8,0,0.2339093287785848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,8,64,0,1,float16,fp8,0,0.4594879945119222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,8,64,0,1,fp8,fp8,0,0.4134613275527954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,float16,0,0.20907733837763467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,1,64,128,1,float16,fp8,0,0.21050133307774863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,1,64,128,1,fp8,fp8,0,0.20138667027155557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,float16,0,0.4175573190053304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,float16,0,0.2137226661046346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,1,64,0,1,float16,fp8,0,0.42045867443084717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,1,64,0,1,fp8,fp8,0,0.3765439987182617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,2,64,128,1,float16,fp8,0,0.21628799041112265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,2,64,128,1,fp8,fp8,0,0.2076639930407206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,float16,0,0.42343465487162274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,float16,0,0.22150399287541708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,2,64,0,1,float16,fp8,0,0.42614932854970294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,2,64,0,1,fp8,fp8,0,0.3832799990971883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,4,64,128,1,float16,fp8,0,0.22514132658640543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,4,64,128,1,fp8,fp8,0,0.2161440054575602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,float16,0,0.4339199860890706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,float16,0,0.12782933314641318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,4,64,0,1,fp8,fp8,0,0.39426132043202716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,4,64,0,1,float16,fp8,0,0.4391893148422241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,float16,0,0.2460213303565979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,8,64,128,1,fp8,fp8,0,0.127920001745224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,8,64,128,1,float16,fp8,0,0.13126933574676514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,float16,0,0.10823999842007954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,8,64,0,1,fp8,fp8,0,0.22519999742507935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,8,64,0,1,float16,fp8,0,0.24893333514531454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,1,64,128,1,float16,fp8,0,0.1093280017375946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,1,64,128,1,fp8,fp8,0,0.10547199845314026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,float16,0,0.22470400730768839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,float16,0,0.11008532842000325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,1,64,0,1,float16,fp8,0,0.2244053284327189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,1,64,0,1,fp8,fp8,0,0.20137067635854086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,2,64,128,1,fp8,fp8,0,0.11124799648920695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,float16,0,0.22750399510065714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,2,64,128,1,float16,fp8,0,0.11349333326021831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,2,64,0,1,float16,fp8,0,0.2286506692568461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,float16,0,0.11662399768829346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,2,64,0,1,fp8,fp8,0,0.2076266606648763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,4,64,128,1,float16,fp8,0,0.11924800276756287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,float16,0,0.23356799284617105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,4,64,128,1,fp8,fp8,0,0.11776533722877502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,4,64,0,1,float16,fp8,0,0.23627734184265137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,float16,0,0.07057600220044453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,4,64,0,1,fp8,fp8,0,0.21397866805394491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,float16,0,0.13894400000572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,8,64,128,1,float16,fp8,0,0.07319466769695282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,8,64,128,1,fp8,fp8,0,0.07448533177375793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,8,64,0,1,float16,fp8,0,0.14062933127085367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,8,64,0,1,fp8,fp8,0,0.13036800424257913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,float16,0,0.06450133522351582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,1,64,128,1,float16,fp8,0,0.06645866731802623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,float16,0,0.13326399525006613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,1,64,128,1,fp8,fp8,0,0.06225066880385081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,float16,0,0.06649066507816315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,1,64,0,1,float16,fp8,0,0.13434132933616638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,1,64,0,1,fp8,fp8,0,0.11813333630561829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,2,64,128,1,float16,fp8,0,0.06646400193373363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,float16,0,0.1341919998327891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,2,64,128,1,fp8,fp8,0,0.06440000236034393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,2,64,0,1,float16,fp8,0,0.13431466619173685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,float16,0,0.06993066767851512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,2,64,0,1,fp8,fp8,0,0.11961600184440613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,float16,0,0.13529599706331888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,4,64,128,1,float16,fp8,0,0.06875200072924297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,4,64,128,1,fp8,fp8,0,0.06804800033569336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,4,64,0,1,float16,fp8,0,0.13614400227864584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,float16,0,0.04427733520666758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,4,64,0,1,fp8,fp8,0,0.12180266777674358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,float16,0,0.09136000275611877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,8,64,128,1,fp8,fp8,0,0.045824001232783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,8,64,128,1,float16,fp8,0,0.0469813346862793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,8,64,0,1,float16,fp8,0,0.09121599793434143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,float16,0,0.04373333354791006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,8,64,0,1,fp8,fp8,0,0.08288533488909404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,float16,0,0.08922132849693298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,1,64,128,1,float16,fp8,0,0.043824002146720886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,1,64,128,1,fp8,fp8,0,0.041696002086003624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,1,64,0,1,float16,fp8,0,0.08894933263460796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,1,64,0,1,fp8,fp8,0,0.07893333335717519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,float16,0,0.04419733087221781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,float16,0,0.08876267075538635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,2,64,128,1,float16,fp8,0,0.04374399781227112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,2,64,128,1,fp8,fp8,0,0.04204266766707102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,2,64,0,1,float16,fp8,0,0.0890933374563853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,2,64,0,1,fp8,fp8,0,0.07900266846021016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,float16,0,0.04391466577847799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,float16,0,0.08947199583053589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,4,64,128,1,float16,fp8,0,0.04369066655635834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,4,64,128,1,fp8,fp8,0,0.04218133290608724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,float16,0,0.035573333501815796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,4,64,0,1,fp8,fp8,0,0.08072533210118611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,4,64,0,1,float16,fp8,0,0.09132799506187439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,float16,0,0.06633066634337108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,8,64,128,1,float16,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,8,64,128,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,8,64,0,1,float16,fp8,0,0.06637866795063019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,8,64,0,1,fp8,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,float16,0,0.03569599986076355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,float16,0,0.06622933348019917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,1,64,128,1,float16,fp8,0,0.03555200000603994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,1,64,128,1,fp8,fp8,0,0.036042665441830955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,1,64,0,1,float16,fp8,0,0.06656533479690552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,1,64,0,1,fp8,fp8,0,0.06108800073464712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,float16,0,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,float16,0,0.06545599798361461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,2,64,128,1,float16,fp8,0,0.03742400060097376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,2,64,128,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,2,64,0,1,float16,fp8,0,0.06625066697597504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,float16,0,0.03724266588687897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,2,64,0,1,fp8,fp8,0,0.0606879989306132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,float16,0,0.06540800134340923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,4,64,128,1,float16,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,4,64,128,1,fp8,fp8,0,0.03581333408753077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,4,64,0,1,float16,fp8,0,0.06611200173695882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,4,64,0,1,fp8,fp8,0,0.06060799956321716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,float16,0,0.3121386567751567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,1,64,128,1,float16,fp8,0,0.3153439958890279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,float16,0,0.5276426474253336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,1,64,128,1,fp8,fp8,0,0.2958880066871643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,1,64,0,1,float16,fp8,0,0.5310133298238119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,1,64,0,1,fp8,fp8,0,0.4763253529866536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,float16,0,0.32472000519434613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,2,64,128,1,float16,fp8,0,0.32470399141311646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,2,64,128,1,fp8,fp8,0,0.30799466371536255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,float16,0,0.5418026844660441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,2,64,0,1,float16,fp8,0,0.5415146748224894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,2,64,0,1,fp8,fp8,0,0.4867146809895833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,float16,0,0.33691199620564777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,4,64,128,1,fp8,fp8,0,0.31963199377059937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,4,64,128,1,float16,fp8,0,0.33662935098012287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,float16,0,0.5541333357493082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,float16,0,0.18734933932622275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,4,64,0,1,fp8,fp8,0,0.50164266427358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,4,64,0,1,float16,fp8,0,0.5558453400929769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,float16,0,0.305567999680837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,8,64,128,1,float16,fp8,0,0.1893813411394755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,8,64,128,1,fp8,fp8,0,0.18346667289733887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,float16,0,0.15896000464757284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,8,64,0,1,float16,fp8,0,0.3079520066579183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,8,64,0,1,fp8,fp8,0,0.28143999973932904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,1,64,128,1,float16,fp8,0,0.16098666191101074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,1,64,128,1,fp8,fp8,0,0.1558133363723755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,float16,0,0.27430399258931476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,1,64,0,1,float16,fp8,0,0.27532267570495605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,float16,0,0.16408532857894897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,1,64,0,1,fp8,fp8,0,0.2529653310775757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,2,64,128,1,float16,fp8,0,0.16704533497492471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,float16,0,0.2795146703720093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,2,64,128,1,fp8,fp8,0,0.16058666507403055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,2,64,0,1,float16,fp8,0,0.28030399481455487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,2,64,0,1,fp8,fp8,0,0.2571413318316142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,float16,0,0.17446933190027872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,float16,0,0.28857600688934326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,4,64,128,1,float16,fp8,0,0.17540266116460165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,4,64,128,1,fp8,fp8,0,0.16934933265050253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,4,64,0,1,float16,fp8,0,0.29000532627105713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,float16,0,0.0993333359559377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,4,64,0,1,fp8,fp8,0,0.2658560077349345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,float16,0,0.16658666729927063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,8,64,128,1,float16,fp8,0,0.10179733236630757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,8,64,128,1,fp8,fp8,0,0.10136533776919048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,8,64,0,1,float16,fp8,0,0.16764267285664877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,8,64,0,1,fp8,fp8,0,0.15612799922625223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,float16,0,0.08524266878763835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,1,64,128,1,float16,fp8,0,0.08716266353925069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,float16,0,0.15041599671045938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,1,64,128,1,fp8,fp8,0,0.08260266482830048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,1,64,0,1,float16,fp8,0,0.15176533659299216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,float16,0,0.08726933598518372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,1,64,0,1,fp8,fp8,0,0.1341386636098226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,float16,0,0.15178666512171426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,2,64,128,1,float16,fp8,0,0.08887466788291931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,2,64,128,1,fp8,fp8,0,0.08623466889063518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,2,64,0,1,float16,fp8,0,0.152837336063385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,2,64,0,1,fp8,fp8,0,0.13845866918563843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,float16,0,0.09135466814041138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,4,64,128,1,float16,fp8,0,0.09286399682362874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,float16,0,0.15524799625078836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,4,64,128,1,fp8,fp8,0,0.09226133426030476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,4,64,0,1,float16,fp8,0,0.15761599938074747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,float16,0,0.056261335810025535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,4,64,0,1,fp8,fp8,0,0.146314670642217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,8,64,128,1,float16,fp8,0,0.058373332023620605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,float16,0,0.09524800380071004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,8,64,128,1,fp8,fp8,0,0.057376002271970115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,8,64,0,1,float16,fp8,0,0.09732799728711446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,8,64,0,1,fp8,fp8,0,0.0886240005493164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,float16,0,0.054101333022117615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,float16,0,0.09306666254997253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,1,64,128,1,float16,fp8,0,0.05260799825191498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,1,64,128,1,fp8,fp8,0,0.05167999863624573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,1,64,0,1,float16,fp8,0,0.09302933017412822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,1,64,0,1,fp8,fp8,0,0.08291733264923096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,float16,0,0.05188799897829691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,float16,0,0.0929813285668691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,2,64,128,1,float16,fp8,0,0.05388799806435903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,2,64,128,1,fp8,fp8,0,0.05201066533724467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,2,64,0,1,float16,fp8,0,0.09301333626111348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,float16,0,0.053898667295773826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,2,64,0,1,fp8,fp8,0,0.08285333216190338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,float16,0,0.09326400359471639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,4,64,128,1,float16,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,4,64,128,1,fp8,fp8,0,0.05337599913279215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,4,64,0,1,float16,fp8,0,0.09486400087674458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,float16,0,0.04002666721741358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,4,64,0,1,fp8,fp8,0,0.08527466654777527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,float16,0,0.06560533245404561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,8,64,128,1,float16,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,8,64,128,1,fp8,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,float16,0,0.039520000418027244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,8,64,0,1,float16,fp8,0,0.06654933094978333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,8,64,0,1,fp8,fp8,0,0.060080001751581825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,float16,0,0.06251733501752217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,1,64,128,1,float16,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,1,64,128,1,fp8,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,1,64,0,1,float16,fp8,0,0.062314664324124656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,float16,0,0.03945599993069967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,1,64,0,1,fp8,fp8,0,0.05650666852792104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,float16,0,0.06226666768391927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,2,64,128,1,float16,fp8,0,0.0391839991013209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,2,64,128,1,fp8,fp8,0,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,2,64,0,1,float16,fp8,0,0.062394668658574425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,float16,0,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,2,64,0,1,fp8,fp8,0,0.05769599974155426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,float16,0,0.06432533264160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,4,64,128,1,float16,fp8,0,0.039605334401130676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,4,64,128,1,fp8,fp8,0,0.039877332746982574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,float16,0,0.031856000423431396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,4,64,0,1,float16,fp8,0,0.06438399851322174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,4,64,0,1,fp8,fp8,0,0.05839466551939646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,8,64,128,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,8,64,128,1,fp8,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,float16,0,0.05450133482615153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,8,64,0,1,float16,fp8,0,0.052015999952952065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,float16,0,0.03173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,8,64,0,1,fp8,fp8,0,0.04990933338801066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,float16,0,0.05413866539796194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,1,64,128,1,float16,fp8,0,0.032042667269706726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,1,64,128,1,fp8,fp8,0,0.03102933367093404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,1,64,0,1,float16,fp8,0,0.05282666782538096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,float16,0,0.03126399964094162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,1,64,0,1,fp8,fp8,0,0.047775998711586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,2,64,128,1,float16,fp8,0,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,float16,0,0.0521919975678126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,2,64,128,1,fp8,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,2,64,0,1,float16,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,2,64,0,1,fp8,fp8,0,0.04822933177153269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,4,64,128,1,float16,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,float16,0,0.053914666175842285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,4,64,128,1,fp8,fp8,0,0.031680000325044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,4,64,0,1,float16,fp8,0,0.051813334226608276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,4,64,0,1,fp8,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,float16,0,0.42586668332417804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,1,64,128,1,float16,fp8,0,0.4291626612345378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,float16,0,0.592965324719747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,1,64,128,1,fp8,fp8,0,0.3952853282292684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,1,64,0,1,float16,fp8,0,0.593392014503479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,1,64,0,1,fp8,fp8,0,0.5313760042190552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,float16,0,0.44547732671101886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,float16,0,0.6098560094833374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,2,64,128,1,float16,fp8,0,0.444543997446696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,2,64,128,1,fp8,fp8,0,0.40584532419840497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,2,64,0,1,float16,fp8,0,0.610922654469808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,2,64,0,1,fp8,fp8,0,0.5434720118840536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,float16,0,0.4540853500366211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,float16,0,0.6242186625798544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,4,64,128,1,float16,fp8,0,0.454309344291687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,4,64,128,1,fp8,fp8,0,0.4200479984283447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,4,64,0,1,float16,fp8,0,0.6244853337605795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,float16,0,0.24709866444269815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,4,64,0,1,fp8,fp8,0,0.5555839935938517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,float16,0,0.34012266000111896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,8,64,128,1,float16,fp8,0,0.24924800793329874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,8,64,128,1,fp8,fp8,0,0.2382240096728007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,8,64,0,1,float16,fp8,0,0.3391146659851074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,8,64,0,1,fp8,fp8,0,0.3143999973932902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,float16,0,0.2118826707204183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,float16,0,0.29850133260091144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,1,64,128,1,float16,fp8,0,0.21296000480651855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,1,64,128,1,fp8,fp8,0,0.20323199033737183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,1,64,0,1,float16,fp8,0,0.3009120027224223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,float16,0,0.21874666213989258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,1,64,0,1,fp8,fp8,0,0.27451733748118085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,float16,0,0.305461327234904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,2,64,128,1,float16,fp8,0,0.21922133366266885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,2,64,128,1,fp8,fp8,0,0.21050665775934854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,2,64,0,1,float16,fp8,0,0.30664000908533734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,2,64,0,1,fp8,fp8,0,0.28170667092005414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,float16,0,0.22779732942581177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,float16,0,0.31538132826487225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,4,64,128,1,float16,fp8,0,0.22965866327285767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,4,64,128,1,fp8,fp8,0,0.2181653380393982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,4,64,0,1,float16,fp8,0,0.31937066713968915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,float16,0,0.12993066509564719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,4,64,0,1,fp8,fp8,0,0.2904319961865743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,float16,0,0.1816640098889669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,8,64,128,1,float16,fp8,0,0.13206400473912558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,8,64,128,1,fp8,fp8,0,0.12909866372744241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,8,64,0,1,float16,fp8,0,0.18163732687632242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,8,64,0,1,fp8,fp8,0,0.17075733343760172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,float16,0,0.10737599929173787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,float16,0,0.15471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,1,64,128,1,float16,fp8,0,0.10979732871055603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,1,64,128,1,fp8,fp8,0,0.10694400469462077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,1,64,0,1,fp8,fp8,0,0.1432319978872935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,1,64,0,1,float16,fp8,0,0.15735466281572977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,float16,0,0.10929600397745769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,2,64,128,1,float16,fp8,0,0.11165866255760193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,2,64,128,1,fp8,fp8,0,0.11025066177050273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,float16,0,0.15875732898712158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,2,64,0,1,fp8,fp8,0,0.15033599734306335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,float16,0,0.11886933445930481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,2,64,0,1,float16,fp8,0,0.16107199589411417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,4,64,128,1,float16,fp8,0,0.11942933003107707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,4,64,128,1,fp8,fp8,0,0.11775466799736023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,float16,0,0.1650986671447754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,float16,0,0.07069333394368489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,4,64,0,1,float16,fp8,0,0.1670080025990804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,4,64,0,1,fp8,fp8,0,0.15870933731396994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,float16,0,0.09698667128880818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,8,64,128,1,float16,fp8,0,0.07193066676457723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,8,64,128,1,fp8,fp8,0,0.07330666482448578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,8,64,0,1,float16,fp8,0,0.09815466403961182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,float16,0,0.06253866851329803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,8,64,0,1,fp8,fp8,0,0.09512533744176228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,float16,0,0.09030399719874065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,1,64,128,1,float16,fp8,0,0.06427733103434245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,1,64,128,1,fp8,fp8,0,0.0603413333495458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,1,64,0,1,float16,fp8,0,0.09161067008972168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,float16,0,0.06459733347098033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,1,64,0,1,fp8,fp8,0,0.08286933104197185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,float16,0,0.09166399637858073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,2,64,128,1,float16,fp8,0,0.06657599906126659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,2,64,128,1,fp8,fp8,0,0.06229333579540253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,2,64,0,1,float16,fp8,0,0.09282132983207703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,float16,0,0.06668266654014587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,2,64,0,1,fp8,fp8,0,0.08329066634178162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,float16,0,0.09331732988357544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,4,64,128,1,float16,fp8,0,0.06862933437029521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,4,64,128,1,fp8,fp8,0,0.065461332599322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,4,64,0,1,float16,fp8,0,0.0946720043818156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,float16,0,0.04365866879622141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,4,64,0,1,fp8,fp8,0,0.08673066894213359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,float16,0,0.06234666705131531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,8,64,128,1,float16,fp8,0,0.04414933423201243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,8,64,128,1,fp8,fp8,0,0.04158399999141693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,8,64,0,1,float16,fp8,0,0.0626933326323827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,8,64,0,1,fp8,fp8,0,0.058864002426465355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,float16,0,0.039887999494870506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,float16,0,0.06041066845258077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,1,64,128,1,float16,fp8,0,0.04188266893227895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,1,64,128,1,fp8,fp8,0,0.03969600051641464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,1,64,0,1,float16,fp8,0,0.060640002290407814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,1,64,0,1,fp8,fp8,0,0.054154664278030396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,float16,0,0.0417546679576238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,float16,0,0.06029333174228668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,2,64,128,1,float16,fp8,0,0.040250666439533234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,2,64,128,1,fp8,fp8,0,0.039647998909155525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,2,64,0,1,float16,fp8,0,0.06026133398214976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,2,64,0,1,fp8,fp8,0,0.0562720000743866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,float16,0,0.04005866746107737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,float16,0,0.060234665870666504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,4,64,128,1,float16,fp8,0,0.042037333051363625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,4,64,128,1,fp8,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,4,64,0,1,float16,fp8,0,0.06157866617043813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,4,64,0,1,fp8,fp8,0,0.056314667065938316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,float16,0,0.04197333256403605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,8,64,128,1,float16,fp8,0,0.030970667799313862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,8,64,128,1,fp8,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,8,64,0,1,float16,fp8,0,0.04351999859015147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,float16,0,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,8,64,0,1,fp8,fp8,0,0.03984000037113825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,float16,0,0.04171200096607208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,1,64,128,1,float16,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,1,64,128,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,1,64,0,1,float16,fp8,0,0.04200533529122671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,1,64,0,1,fp8,fp8,0,0.03948266555865606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,float16,0,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,float16,0,0.041573333243529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,2,64,128,1,float16,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,2,64,128,1,fp8,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,2,64,0,1,float16,fp8,0,0.04159466673930486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,2,64,0,1,fp8,fp8,0,0.037978666524092354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,float16,0,0.04180799921353658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,float16,0,0.02959466725587845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,4,64,128,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,4,64,128,1,fp8,fp8,0,0.02796799937884013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,4,64,0,1,float16,fp8,0,0.04213866591453552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,4,64,0,1,fp8,fp8,0,0.04032533367474874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,8,64,128,1,float16,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,float16,0,0.04065600037574768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,8,64,128,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,8,64,0,1,float16,fp8,0,0.040448000033696495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,8,64,0,1,fp8,fp8,0,0.03789333254098892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,float16,0,0.04009599983692169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,1,64,128,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,1,64,128,1,fp8,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,1,64,0,1,float16,fp8,0,0.04141333450873693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,1,64,0,1,fp8,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,float16,0,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,float16,0,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,2,64,128,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,2,64,128,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,2,64,0,1,float16,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,2,64,0,1,fp8,fp8,0,0.03799466788768768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,float16,0,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,float16,0,0.0399893323580424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,4,64,128,1,float16,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,4,64,128,1,fp8,fp8,0,0.02720000098148982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,4,64,0,1,float16,fp8,0,0.04148799926042557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,4,64,0,1,fp8,fp8,0,0.037621334195137024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,float16,0,0.3176906704902649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,float16,0,0.39738134543100995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,1,64,128,1,float16,fp8,0,0.3190453251202901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,1,64,128,1,fp8,fp8,0,0.2983253399531047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,1,64,0,1,float16,fp8,0,0.39770666758219403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,1,64,0,1,fp8,fp8,0,0.36512001355489093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,float16,0,0.32969067494074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,float16,0,0.41065601507822674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,2,64,128,1,float16,fp8,0,0.331712007522583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,2,64,128,1,fp8,fp8,0,0.3094506661097209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,2,64,0,1,float16,fp8,0,0.4122186501820882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,2,64,0,1,fp8,fp8,0,0.3761119842529297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,float16,0,0.34142935276031494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,4,64,128,1,float16,fp8,0,0.34567467371622723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,float16,0,0.4242560068766276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,4,64,128,1,fp8,fp8,0,0.3211626609166463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,float16,0,0.1885226567586263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,4,64,0,1,float16,fp8,0,0.4280053377151489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,4,64,0,1,fp8,fp8,0,0.38843198617299396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,float16,0,0.23499733209609985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,8,64,128,1,float16,fp8,0,0.19132266441980997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,8,64,128,1,fp8,fp8,0,0.18454400698343912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,8,64,0,1,float16,fp8,0,0.23758399486541748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,float16,0,0.1585653324921926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,8,64,0,1,fp8,fp8,0,0.22198933362960815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,float16,0,0.19994133710861206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,1,64,128,1,fp8,fp8,0,0.15557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,1,64,128,1,float16,fp8,0,0.15915733575820923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,1,64,0,1,float16,fp8,0,0.20213866233825684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,1,64,0,1,fp8,fp8,0,0.18964266777038574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,float16,0,0.1644533375898997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,float16,0,0.21061333020528158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,2,64,128,1,fp8,fp8,0,0.16059733430544534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,2,64,128,1,float16,fp8,0,0.16566399733225504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,2,64,0,1,float16,fp8,0,0.2102186679840088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,2,64,0,1,fp8,fp8,0,0.1985386610031128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,float16,0,0.17268266280492148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,float16,0,0.2176426649093628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,4,64,128,1,float16,fp8,0,0.1764693260192871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,4,64,128,1,fp8,fp8,0,0.17004267374674478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,float16,0,0.09880533814430237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,4,64,0,1,float16,fp8,0,0.22107199827829996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,4,64,0,1,fp8,fp8,0,0.2059733271598816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,8,64,128,1,float16,fp8,0,0.10180800159772237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,float16,0,0.12507200241088867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,8,64,0,1,float16,fp8,0,0.12717866897583008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,8,64,128,1,fp8,fp8,0,0.1018399993578593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,8,64,0,1,fp8,fp8,0,0.12176000078519185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,float16,0,0.08481066425641377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,float16,0,0.1090826690196991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,1,64,128,1,float16,fp8,0,0.08717866738637288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,1,64,128,1,fp8,fp8,0,0.08101333181063335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,1,64,0,1,float16,fp8,0,0.11034666498502095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,1,64,0,1,fp8,fp8,0,0.099263995885849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,float16,0,0.08749333024024963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,float16,0,0.1113973359266917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,2,64,128,1,float16,fp8,0,0.08900800347328186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,2,64,128,1,fp8,fp8,0,0.08443199594815572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,2,64,0,1,float16,fp8,0,0.11400000254313152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,2,64,0,1,fp8,fp8,0,0.10529067118962605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,float16,0,0.09099733829498291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,float16,0,0.11396267016728719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,4,64,128,1,float16,fp8,0,0.09380799531936646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,4,64,128,1,fp8,fp8,0,0.09249599774678548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,4,64,0,1,fp8,fp8,0,0.11169067025184631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,4,64,0,1,float16,fp8,0,0.11782399813334148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,float16,0,0.056218668818473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,float16,0,0.07134933272997539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,8,64,128,1,float16,fp8,0,0.05818133552869161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,8,64,128,1,fp8,fp8,0,0.0566293348868688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,8,64,0,1,float16,fp8,0,0.07312533259391785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,8,64,0,1,fp8,fp8,0,0.06880533198515575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,float16,0,0.05000533163547516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,float16,0,0.06664533416430156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,1,64,128,1,float16,fp8,0,0.052330667773882546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,1,64,128,1,fp8,fp8,0,0.049973333875338234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,1,64,0,1,float16,fp8,0,0.06664533416430156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,1,64,0,1,fp8,fp8,0,0.062309334675470986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,float16,0,0.05208000044027964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,float16,0,0.06625066697597504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,2,64,128,1,float16,fp8,0,0.05228800078233083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,2,64,128,1,fp8,fp8,0,0.05237866441408793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,2,64,0,1,float16,fp8,0,0.06816000243028005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,2,64,0,1,fp8,fp8,0,0.06251733501752217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,float16,0,0.05407466491063436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,float16,0,0.06884266436100006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,4,64,128,1,float16,fp8,0,0.056143999099731445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,4,64,128,1,fp8,fp8,0,0.052095999320348106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,4,64,0,1,float16,fp8,0,0.07052266597747803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,4,64,0,1,fp8,fp8,0,0.06489066779613495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,float16,0,0.037978666524092354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,float16,0,0.04772266745567322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,8,64,128,1,float16,fp8,0,0.03912533322970072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,8,64,128,1,fp8,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,8,64,0,1,float16,fp8,0,0.04836800197760264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,8,64,0,1,fp8,fp8,0,0.044010668992996216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,float16,0,0.03587199995915095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,float16,0,0.04576533536116282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,1,64,128,1,float16,fp8,0,0.03746666759252548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,1,64,0,1,float16,fp8,0,0.04607999821503957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,1,64,128,1,fp8,fp8,0,0.035904000202814736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,1,64,0,1,fp8,fp8,0,0.04199466605981191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,float16,0,0.037530665596326195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,float16,0,0.046207999189694725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,2,64,128,1,float16,fp8,0,0.037920000652472176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,2,64,0,1,float16,fp8,0,0.046810666720072426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,2,64,128,1,fp8,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,2,64,0,1,fp8,fp8,0,0.04364799956480662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,float16,0,0.03655466685692469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,float16,0,0.04701866706212362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,4,64,128,1,float16,fp8,0,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,4,64,128,1,fp8,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,4,64,0,1,float16,fp8,0,0.04604800045490265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,4,64,0,1,fp8,fp8,0,0.04357333481311798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,float16,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,8,64,128,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,8,64,128,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,8,64,0,1,float16,fp8,0,0.036042665441830955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,8,64,0,1,fp8,fp8,0,0.03383466601371765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,float16,0,0.03563733398914337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,1,64,128,1,float16,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,1,64,128,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,1,64,0,1,float16,fp8,0,0.03554133325815201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,1,64,0,1,fp8,fp8,0,0.033471999069054924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,float16,0,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,float16,0,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,2,64,128,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,2,64,128,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,2,64,0,1,float16,fp8,0,0.0359253336985906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,2,64,0,1,fp8,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,float16,0,0.025279998779296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,4,64,128,1,float16,fp8,0,0.027610667049884796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,float16,0,0.035605333745479584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,4,64,128,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,4,64,0,1,float16,fp8,0,0.03583999971548716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,4,64,0,1,fp8,fp8,0,0.03173866619666418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,float16,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,float16,0,0.033610666791598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,8,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,8,64,128,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,8,64,0,1,float16,fp8,0,0.03377600014209747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,8,64,0,1,fp8,fp8,0,0.03173866619666418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,float16,0,0.033930666744709015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,1,64,128,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,1,64,128,1,float16,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,1,64,0,1,float16,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,1,64,0,1,fp8,fp8,0,0.03140799949566523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,float16,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,float16,0,0.0336053321758906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,2,64,128,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,2,64,128,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,2,64,0,1,float16,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,2,64,0,1,fp8,fp8,0,0.03136533250411352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,4,64,128,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,float16,0,0.03340800106525421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,4,64,128,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,4,64,0,1,float16,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,4,64,0,1,fp8,fp8,0,0.03215466688076655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,float16,0,0.3634879986445109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,1,64,128,1,float16,fp8,0,0.35944533348083496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,1,64,128,1,fp8,fp8,0,0.3389333486557007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,float16,0,0.41305065155029297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,1,64,0,1,float16,fp8,0,0.4113226731618245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,1,64,0,1,fp8,fp8,0,0.37588266531626385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,float16,0,0.3656746546427409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,float16,0,0.415829340616862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,2,64,128,1,float16,fp8,0,0.3652373154958089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,2,64,128,1,fp8,fp8,0,0.34333332379659015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,2,64,0,1,float16,fp8,0,0.4152959982554118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,2,64,0,1,fp8,fp8,0,0.3832053343454997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,float16,0,0.37385066350301105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,float16,0,0.42643733819325763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,4,64,128,1,float16,fp8,0,0.37564265727996826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,4,64,128,1,fp8,fp8,0,0.3591839869817098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,float16,0,0.1978293259938558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,4,64,0,1,float16,fp8,0,0.42791465918223065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,4,64,0,1,fp8,fp8,0,0.40853333473205566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,float16,0,0.22831465800603232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,8,64,128,1,float16,fp8,0,0.1957226594289144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,8,64,128,1,fp8,fp8,0,0.20137600104014078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,8,64,0,1,float16,fp8,0,0.22616000970204672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,8,64,0,1,fp8,fp8,0,0.22427733739217123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,float16,0,0.1904159982999166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,float16,0,0.21653334299723306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,1,64,128,1,float16,fp8,0,0.189626673857371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,1,64,128,1,fp8,fp8,0,0.1776906649271647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,1,64,0,1,float16,fp8,0,0.21533334255218506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,1,64,0,1,fp8,fp8,0,0.1992266575495402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,float16,0,0.1916373372077942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,2,64,128,1,float16,fp8,0,0.19163199265797934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,float16,0,0.21876800060272217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,2,64,128,1,fp8,fp8,0,0.18126932779947916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,2,64,0,1,fp8,fp8,0,0.2018293341000875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,2,64,0,1,float16,fp8,0,0.21995733181635538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,float16,0,0.19565333922704062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,4,64,128,1,float16,fp8,0,0.19779199361801147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,float16,0,0.22635199626286825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,4,64,128,1,fp8,fp8,0,0.19005332390467325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,float16,0,0.10988266269365947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,4,64,0,1,fp8,fp8,0,0.21072532733281454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,4,64,0,1,float16,fp8,0,0.22607467571894327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,float16,0,0.12774933377901712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,8,64,128,1,float16,fp8,0,0.10776533683141072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,8,64,128,1,fp8,fp8,0,0.11184533437093098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,8,64,0,1,float16,fp8,0,0.12533332904179892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,8,64,0,1,fp8,fp8,0,0.12291199962298076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,float16,0,0.1034986674785614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,float16,0,0.11716266473134358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,1,64,128,1,float16,fp8,0,0.10123200217882793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,1,64,128,1,fp8,fp8,0,0.09727999567985535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,1,64,0,1,float16,fp8,0,0.11680533488591512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,1,64,0,1,fp8,fp8,0,0.10943466424942017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,float16,0,0.10475200414657593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,float16,0,0.12008532881736755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,2,64,128,1,float16,fp8,0,0.1048426628112793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,2,64,128,1,fp8,fp8,0,0.09973866740862529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,2,64,0,1,fp8,fp8,0,0.11296000083287557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,2,64,0,1,float16,fp8,0,0.119759996732076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,float16,0,0.1074079970518748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,float16,0,0.12388267119725545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,4,64,128,1,float16,fp8,0,0.10774933298428853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,4,64,128,1,fp8,fp8,0,0.10478933652242024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,4,64,0,1,float16,fp8,0,0.12361600001653035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,float16,0,0.060191998879114784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,4,64,0,1,fp8,fp8,0,0.11606933673222859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,float16,0,0.06867200136184692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,8,64,128,1,fp8,fp8,0,0.06458666423956554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,8,64,0,1,float16,fp8,0,0.06891733407974243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,8,64,128,1,float16,fp8,0,0.05995733539263407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,8,64,0,1,fp8,fp8,0,0.07017600039641063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,float16,0,0.05971199770768484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,float16,0,0.06817600131034851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,1,64,128,1,float16,fp8,0,0.05872533222039541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,1,64,128,1,fp8,fp8,0,0.05635733405749003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,1,64,0,1,float16,fp8,0,0.06725333134333293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,1,64,0,1,fp8,fp8,0,0.06325866778691609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,float16,0,0.06046399970849355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,float16,0,0.06905066470305125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,2,64,128,1,float16,fp8,0,0.05818133552869161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,2,64,128,1,fp8,fp8,0,0.058149332801500954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,2,64,0,1,fp8,fp8,0,0.06435200075308482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,2,64,0,1,float16,fp8,0,0.06849066913127899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,float16,0,0.06087466577688853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,float16,0,0.0701333334048589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,4,64,128,1,float16,fp8,0,0.060271998246510826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,4,64,128,1,fp8,fp8,0,0.059077332417170204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,4,64,0,1,float16,fp8,0,0.07018133501211803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,4,64,0,1,fp8,fp8,0,0.06711466610431671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,float16,0,0.037434667348861694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,float16,0,0.046154667933781944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,8,64,128,1,float16,fp8,0,0.037530665596326195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,8,64,128,1,fp8,fp8,0,0.0383093332250913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,8,64,0,1,float16,fp8,0,0.04385066529115041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,8,64,0,1,fp8,fp8,0,0.043807998299598694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,float16,0,0.0440533310174942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,1,64,128,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,1,64,128,1,fp8,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,1,64,0,1,float16,fp8,0,0.044079999128977455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,1,64,0,1,fp8,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,float16,0,0.04620266457398733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,2,64,128,1,float16,fp8,0,0.03790933390458425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,2,64,128,1,fp8,fp8,0,0.03746666759252548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,2,64,0,1,float16,fp8,0,0.04426133135954539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,2,64,0,1,fp8,fp8,0,0.041989331444104515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,float16,0,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,float16,0,0.0461760014295578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,4,64,128,1,float16,fp8,0,0.037834666669368744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,4,64,128,1,fp8,fp8,0,0.037871999045213066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,4,64,0,1,float16,fp8,0,0.04608533283074697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,4,64,0,1,fp8,fp8,0,0.04398400088151296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,float16,0,0.02479466547568639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,float16,0,0.029663999875386555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,8,64,128,1,float16,fp8,0,0.025642665723959606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,8,64,128,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,8,64,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,8,64,0,1,fp8,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,float16,0,0.02977599948644638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,1,64,128,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,1,64,128,1,float16,fp8,0,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,1,64,0,1,float16,fp8,0,0.030005333324273426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,1,64,0,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,float16,0,0.030837332208951313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,2,64,128,1,float16,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,2,64,128,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,2,64,0,1,float16,fp8,0,0.030645333230495453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,2,64,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,float16,0,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,4,64,128,1,float16,fp8,0,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,4,64,128,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,4,64,0,1,float16,fp8,0,0.029711998999118805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,4,64,0,1,fp8,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,float16,0,0.021615999440352123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,8,64,128,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,8,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,8,64,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,8,64,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,float16,0,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,1,64,128,1,float16,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,1,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,1,64,0,1,float16,fp8,0,0.026704000929991405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,1,64,0,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,float16,0,0.025909334421157837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,2,64,128,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,2,64,128,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,2,64,0,1,float16,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,2,64,0,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,float16,0,0.02292799949645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,4,64,128,1,float16,fp8,0,0.021594665944576263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,4,64,128,1,fp8,fp8,0,0.021701333423455555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,4,64,0,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,4,64,0,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,float16,0,0.02088533341884613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,float16,0,0.024826665719350178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,8,64,128,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,8,64,128,1,fp8,fp8,0,0.020741333564122517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,8,64,0,1,fp8,fp8,0,0.02293333411216736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,8,64,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,1,64,128,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,1,64,128,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,1,64,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,1,64,0,1,fp8,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,float16,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,float16,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,2,64,128,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,2,64,128,1,fp8,fp8,0,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,2,64,0,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,2,64,0,1,float16,fp8,0,0.025653332471847534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,4,64,128,1,float16,fp8,0,0.021509334444999695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,4,64,128,1,fp8,fp8,0,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,4,64,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,4,64,0,1,fp8,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,1,64,128,1,float16,float16,0,0.3534080187479655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,1,64,0,1,float16,float16,0,0.35315199693044025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,1,64,128,1,float16,fp8,0,0.3500800132751465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,1,64,128,1,fp8,fp8,0,0.3311786651611328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,1,64,0,1,float16,fp8,0,0.350874662399292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,1,64,0,1,fp8,fp8,0,0.32873600721359253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,2,64,0,1,float16,float16,0,0.3575253486633301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,2,64,128,1,float16,float16,0,0.3565066655476888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,2,64,128,1,float16,fp8,0,0.35709865887959796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,2,64,128,1,fp8,fp8,0,0.3383359909057617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,2,64,0,1,float16,fp8,0,0.35704533259073895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,2,64,0,1,fp8,fp8,0,0.33315734068552655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,4,64,128,1,float16,float16,0,0.3662666479746501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,4,64,0,1,float16,float16,0,0.3705813487370809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,4,64,128,1,float16,fp8,0,0.3667200009028117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,4,64,128,1,fp8,fp8,0,0.3532373507817586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,4,64,0,1,float16,fp8,0,0.3685813347498576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,8,64,128,1,float16,float16,0,0.19134400288263956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,8,64,0,1,float16,float16,0,0.19518399238586426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,8,64,128,1,float16,fp8,0,0.18992533286412558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,4,64,0,1,fp8,fp8,0,0.3524746497472127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,8,64,128,1,fp8,fp8,0,0.19572800397872925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,8,64,0,1,float16,fp8,0,0.1925706664721171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,8,64,0,1,fp8,fp8,0,0.19576533635457358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,1,64,128,1,float16,float16,0,0.18604799111684164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,1,64,0,1,float16,float16,0,0.18542933464050293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,1,64,128,1,float16,fp8,0,0.18283732732137045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,1,64,128,1,fp8,fp8,0,0.1750239928563436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,1,64,0,1,float16,fp8,0,0.1846346656481425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,1,64,0,1,fp8,fp8,0,0.17244799931844076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,2,64,128,1,float16,float16,0,0.18759999672571817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,2,64,0,1,float16,float16,0,0.18953067064285278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,2,64,128,1,float16,fp8,0,0.18574400742848715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,2,64,128,1,fp8,fp8,0,0.177130659421285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,2,64,0,1,float16,fp8,0,0.18754667043685913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,2,64,0,1,fp8,fp8,0,0.17518399159113565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,4,64,128,1,float16,float16,0,0.19150400161743164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,4,64,0,1,float16,float16,0,0.19332265853881836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,4,64,128,1,float16,fp8,0,0.19113065799077353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,4,64,128,1,fp8,fp8,0,0.18568533658981323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,4,64,0,1,float16,fp8,0,0.19236799081166586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,4,64,0,1,fp8,fp8,0,0.18374399344126383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,8,64,128,1,float16,float16,0,0.10598933696746826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,8,64,0,1,float16,float16,0,0.10904533664385478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,8,64,128,1,float16,fp8,0,0.10518399874369304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,8,64,128,1,fp8,fp8,0,0.10973333319028218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,8,64,0,1,float16,fp8,0,0.10735467076301575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,8,64,0,1,fp8,fp8,0,0.10950932900110881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,1,64,128,1,float16,float16,0,0.1011840005715688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,1,64,0,1,float16,float16,0,0.10098666946093242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,1,64,128,1,float16,fp8,0,0.10032000144322713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,1,64,128,1,fp8,fp8,0,0.09709866841634114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,1,64,0,1,float16,fp8,0,0.10012267033259074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,1,64,0,1,fp8,fp8,0,0.09522666533788045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,2,64,128,1,float16,float16,0,0.10333866874376933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,2,64,0,1,float16,float16,0,0.1032373309135437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,2,64,128,1,float16,fp8,0,0.10158933202425639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,2,64,128,1,fp8,fp8,0,0.10034666458765666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,2,64,0,1,float16,fp8,0,0.10327999790509541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,2,64,0,1,fp8,fp8,0,0.09731200337409973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,4,64,128,1,float16,float16,0,0.10566400488217671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,4,64,0,1,float16,float16,0,0.10737599929173787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,4,64,128,1,float16,fp8,0,0.10545600454012553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,4,64,128,1,fp8,fp8,0,0.10326932867368062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,4,64,0,1,float16,fp8,0,0.10725866754849751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,4,64,0,1,fp8,fp8,0,0.10240532954533894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,8,64,128,1,float16,float16,0,0.06020799775918325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,8,64,0,1,float16,float16,0,0.058330665032068886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,8,64,128,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,8,64,128,1,fp8,fp8,0,0.06413333117961884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,8,64,0,1,float16,fp8,0,0.05821333328882853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,8,64,0,1,fp8,fp8,0,0.06142933170000712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,1,64,128,1,float16,float16,0,0.05830933153629303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,1,64,0,1,float16,float16,0,0.05625600119431814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,1,64,128,1,float16,fp8,0,0.055973331133524575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,1,64,128,1,fp8,fp8,0,0.05453333258628845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,1,64,0,1,float16,fp8,0,0.05667733152707418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,1,64,0,1,fp8,fp8,0,0.053818667928377785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,2,64,128,1,float16,float16,0,0.05825066566467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,2,64,0,1,float16,float16,0,0.058415999015172325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,2,64,128,1,float16,fp8,0,0.05804799993832906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,2,64,128,1,fp8,fp8,0,0.05561600128809611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,2,64,0,1,float16,fp8,0,0.058261334896087646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,2,64,0,1,fp8,fp8,0,0.05504000186920166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,4,64,128,1,float16,float16,0,0.06031466523806254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,4,64,0,1,float16,float16,0,0.05993066728115082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,4,64,128,1,float16,fp8,0,0.05979733169078827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,4,64,128,1,fp8,fp8,0,0.05997333427270254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,4,64,0,1,float16,fp8,0,0.06010133524735769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,4,64,0,1,fp8,fp8,0,0.058176000912984215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,8,64,128,1,float16,float16,0,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,8,64,0,1,float16,float16,0,0.03763733307520548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,8,64,128,1,float16,fp8,0,0.037471999724706016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,8,64,0,1,float16,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,8,64,0,1,fp8,fp8,0,0.03806933263937632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,8,64,128,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,1,64,128,1,float16,float16,0,0.03739733248949051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,1,64,128,1,float16,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,1,64,0,1,float16,float16,0,0.037871999045213066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,1,64,0,1,float16,fp8,0,0.03748800108830134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,1,64,128,1,fp8,fp8,0,0.0365280012289683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,1,64,0,1,fp8,fp8,0,0.03746666759252548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,2,64,128,1,float16,float16,0,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,2,64,0,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,2,64,128,1,float16,fp8,0,0.03862933317820231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,2,64,128,1,fp8,fp8,0,0.03735466549793879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,2,64,0,1,float16,fp8,0,0.03805333375930786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,2,64,0,1,fp8,fp8,0,0.03645866612593333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,4,64,0,1,float16,float16,0,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,4,64,128,1,float16,fp8,0,0.03822399924198786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,4,64,128,1,float16,float16,0,0.040048000713189445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,4,64,128,1,fp8,fp8,0,0.0379573330283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,4,64,0,1,float16,fp8,0,0.03958400090535482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,4,64,0,1,fp8,fp8,0,0.039290666580200195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,8,64,128,1,float16,float16,0,0.025983999172846477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,8,64,0,1,float16,float16,0,0.025792000194390614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,8,64,128,1,float16,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,8,64,128,1,fp8,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,8,64,0,1,fp8,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,1,64,128,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,8,64,0,1,float16,fp8,0,0.02792533238728841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,1,64,0,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,1,64,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,1,64,128,1,fp8,fp8,0,0.025573333104451496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,1,64,0,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,1,64,0,1,float16,fp8,0,0.026506667335828144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,2,64,128,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,2,64,0,1,float16,float16,0,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,2,64,128,1,float16,fp8,0,0.025829332570234936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,2,64,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,2,64,128,1,fp8,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,2,64,0,1,fp8,fp8,0,0.025744001070658367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,4,64,128,1,float16,float16,0,0.025600001215934753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,4,64,0,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,4,64,128,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,4,64,128,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,4,64,0,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,8,64,128,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,4,64,0,1,fp8,fp8,0,0.026858667532602947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,8,64,0,1,float16,float16,0,0.02181866765022278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,8,64,128,1,float16,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,8,64,128,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,8,64,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,8,64,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,1,64,128,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,1,64,0,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,1,64,128,1,float16,fp8,0,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,1,64,128,1,fp8,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,1,64,0,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,1,64,0,1,fp8,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,2,64,128,1,float16,float16,0,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,2,64,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,2,64,128,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,2,64,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,2,64,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,2,64,0,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,4,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,4,64,0,1,float16,float16,0,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,4,64,128,1,float16,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,4,64,128,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,4,64,0,1,float16,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,4,64,0,1,fp8,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,8,64,128,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,8,64,0,1,float16,float16,0,0.019861333072185516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,8,64,128,1,float16,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,8,64,128,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,8,64,0,1,float16,fp8,0,0.021509334444999695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,8,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,1,64,128,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,1,64,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,1,64,128,1,float16,fp8,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,1,64,128,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,1,64,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,1,64,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,2,64,128,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,2,64,128,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,2,64,0,1,float16,float16,0,0.019637333850065868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,2,64,128,1,fp8,fp8,0,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,2,64,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,2,64,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,4,64,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,4,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,4,64,128,1,float16,fp8,0,0.02081599955757459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,4,64,0,1,float16,fp8,0,0.019861333072185516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,4,64,128,1,fp8,fp8,0,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,4,64,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,8,64,0,1,float16,float16,0,0.019760000209013622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,8,64,128,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,8,64,128,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,8,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,8,64,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,8,64,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,1,64,128,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,1,64,0,1,float16,float16,0,0.019472000499566395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,1,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,1,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,1,64,0,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,1,64,0,1,fp8,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,2,64,128,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,2,64,0,1,float16,float16,0,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,2,64,128,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,2,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,2,64,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,2,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,4,64,128,1,float16,float16,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,4,64,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,4,64,128,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,4,64,128,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,4,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,4,64,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,1,64,128,1,float16,float16,0,0.17303466796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,1,64,0,1,float16,float16,0,0.16906134287516275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,1,64,128,1,float16,fp8,0,0.1731520096460978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,1,64,128,1,fp8,fp8,0,0.16222400466601053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,1,64,0,1,float16,fp8,0,0.1679626703262329
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,1,64,0,1,fp8,fp8,0,0.15852266550064087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,2,64,128,1,float16,float16,0,0.17404266198476157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,2,64,0,1,float16,float16,0,0.17126399278640747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,2,64,128,1,float16,fp8,0,0.1735360026359558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,2,64,0,1,float16,fp8,0,0.1690666675567627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,2,64,128,1,fp8,fp8,0,0.16422933340072632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,2,64,0,1,fp8,fp8,0,0.15858667095502219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,4,64,128,1,float16,float16,0,0.18107734123865762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,4,64,0,1,float16,float16,0,0.17709332704544067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,4,64,128,1,fp8,fp8,0,0.1738026738166809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,4,64,128,1,float16,fp8,0,0.17880000670750937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,4,64,0,1,float16,fp8,0,0.1763146718343099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,8,64,128,1,float16,float16,0,0.09950400392214458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,4,64,0,1,fp8,fp8,0,0.16991466283798218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,8,64,128,1,float16,fp8,0,0.09858666857083638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,8,64,0,1,float16,float16,0,0.09890133142471313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,8,64,128,1,fp8,fp8,0,0.10122666756312053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,8,64,0,1,float16,fp8,0,0.09708799918492635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,8,64,0,1,fp8,fp8,0,0.09758933385213216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,1,64,128,1,float16,float16,0,0.09297066926956177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,1,64,0,1,float16,float16,0,0.09021866321563721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,1,64,128,1,float16,fp8,0,0.09120532870292664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,1,64,128,1,fp8,fp8,0,0.08892266949017842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,1,64,0,1,float16,fp8,0,0.08912533521652222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,1,64,0,1,fp8,fp8,0,0.08661866188049316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,2,64,128,1,float16,float16,0,0.09315199653307597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,2,64,0,1,float16,float16,0,0.09310400485992432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,2,64,128,1,float16,fp8,0,0.09335466225941975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,2,64,128,1,fp8,fp8,0,0.09102400143941243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,2,64,0,1,float16,fp8,0,0.09230933586756389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,2,64,0,1,fp8,fp8,0,0.08692800005276997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,4,64,128,1,float16,float16,0,0.09826667110125224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,4,64,0,1,float16,float16,0,0.0972213347752889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,4,64,128,1,float16,fp8,0,0.09718933701515198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,4,64,128,1,fp8,fp8,0,0.09583466251691182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,4,64,0,1,float16,fp8,0,0.09505599737167358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,4,64,0,1,fp8,fp8,0,0.09444800019264221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,8,64,128,1,float16,float16,0,0.0540533314148585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,8,64,0,1,float16,float16,0,0.054192001620928444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,8,64,128,1,float16,fp8,0,0.05622399846712748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,8,64,128,1,fp8,fp8,0,0.057301332553227745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,8,64,0,1,float16,fp8,0,0.05354666709899902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,8,64,0,1,fp8,fp8,0,0.0576800008614858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,1,64,128,1,float16,float16,0,0.054272000988324486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,1,64,0,1,float16,float16,0,0.05260799825191498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,1,64,128,1,float16,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,1,64,128,1,fp8,fp8,0,0.05242133140563965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,1,64,0,1,float16,fp8,0,0.052255998055140175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,1,64,0,1,fp8,fp8,0,0.04967466493447622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,2,64,128,1,float16,float16,0,0.05414933462937673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,2,64,0,1,float16,float16,0,0.054144000013669334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,2,64,128,1,float16,fp8,0,0.05393599967161814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,2,64,128,1,fp8,fp8,0,0.05194133520126343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,2,64,0,1,float16,fp8,0,0.053904001911481224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,2,64,0,1,fp8,fp8,0,0.04994133114814758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,4,64,128,1,float16,float16,0,0.05611200133959452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,4,64,0,1,float16,float16,0,0.05407466491063436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,4,64,128,1,float16,fp8,0,0.054341331124305725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,4,64,128,1,fp8,fp8,0,0.05603733162085215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,4,64,0,1,float16,fp8,0,0.0539680023988088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,4,64,0,1,fp8,fp8,0,0.05433600147565206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,8,64,128,1,float16,float16,0,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,8,64,0,1,float16,float16,0,0.03554133325815201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,8,64,128,1,float16,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,8,64,128,1,fp8,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,8,64,0,1,float16,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,8,64,0,1,fp8,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,1,64,128,1,float16,float16,0,0.0354720006386439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,1,64,0,1,float16,float16,0,0.03544000039498011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,1,64,128,1,float16,fp8,0,0.03590933233499527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,1,64,128,1,fp8,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,1,64,0,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,1,64,0,1,fp8,fp8,0,0.03342399994532267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,2,64,128,1,float16,float16,0,0.03547733277082443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,2,64,128,1,float16,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,2,64,0,1,float16,float16,0,0.03455466777086258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,2,64,128,1,fp8,fp8,0,0.033733333150545754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,2,64,0,1,float16,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,2,64,0,1,fp8,fp8,0,0.03326933334271113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,4,64,128,1,float16,float16,0,0.0367999995748202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,4,64,0,1,float16,float16,0,0.03595733394225439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,4,64,128,1,float16,fp8,0,0.035962666074434914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,4,64,128,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,4,64,0,1,float16,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,4,64,0,1,fp8,fp8,0,0.03405333310365677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,8,64,128,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,8,64,0,1,float16,float16,0,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,8,64,128,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,8,64,0,1,float16,fp8,0,0.0242399995525678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,8,64,128,1,float16,fp8,0,0.024346667031447094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,8,64,0,1,fp8,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,1,64,128,1,float16,float16,0,0.02458133300145467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,1,64,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,1,64,128,1,fp8,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,1,64,128,1,float16,fp8,0,0.025733334322770435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,1,64,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,1,64,0,1,fp8,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,2,64,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,2,64,128,1,float16,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,2,64,128,1,fp8,fp8,0,0.024122667809327442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,2,64,0,1,float16,float16,0,0.024304000039895374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,2,64,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,2,64,0,1,fp8,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,4,64,128,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,4,64,0,1,float16,float16,0,0.023989332218964893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,4,64,128,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,4,64,128,1,fp8,fp8,0,0.02390933285156886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,4,64,0,1,float16,fp8,0,0.023760000864664715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,8,64,128,1,float16,float16,0,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,4,64,0,1,fp8,fp8,0,0.024160000185171764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,8,64,0,1,float16,float16,0,0.020197333147128422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,8,64,128,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,8,64,128,1,fp8,fp8,0,0.019786667078733444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,8,64,0,1,float16,fp8,0,0.02015999952952067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,8,64,0,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,1,64,128,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,1,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,1,64,128,1,float16,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,1,64,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,1,64,0,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,1,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,2,64,128,1,float16,float16,0,0.02077866718173027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,2,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,2,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,2,64,128,1,fp8,fp8,0,0.019727999965349834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,2,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,2,64,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,4,64,128,1,float16,float16,0,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,4,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,4,64,128,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,4,64,128,1,fp8,fp8,0,0.020138667275508244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,4,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,4,64,0,1,fp8,fp8,0,0.019978666057189304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,8,64,128,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,8,64,0,1,float16,float16,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,8,64,128,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,8,64,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,8,64,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,8,64,128,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,1,64,128,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,1,64,0,1,float16,float16,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,1,64,128,1,float16,fp8,0,0.018346666047970455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,1,64,128,1,fp8,fp8,0,0.018079999834299088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,1,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,1,64,0,1,fp8,fp8,0,0.018042666216691334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,2,64,128,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,2,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,2,64,128,1,float16,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,2,64,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,2,64,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,2,64,0,1,fp8,fp8,0,0.018250666558742523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,4,64,128,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,4,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,4,64,128,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,4,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,4,64,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,4,64,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,8,64,128,1,float16,float16,0,0.017727999637524288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,8,64,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,8,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,8,64,128,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,8,64,0,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,8,64,0,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,1,64,128,1,float16,float16,0,0.017818666994571686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,1,64,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,1,64,128,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,1,64,128,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,1,64,0,1,float16,fp8,0,0.018458666900793713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,1,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,2,64,128,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,2,64,0,1,float16,float16,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,2,64,128,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,2,64,128,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,2,64,0,1,float16,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,2,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,4,64,128,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,4,64,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,4,64,128,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,4,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,4,64,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,4,64,0,1,fp8,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,8,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,8,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,8,64,128,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,8,64,128,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,8,64,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,1,64,128,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,8,64,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,1,64,0,1,float16,float16,0,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,1,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,1,64,0,1,float16,fp8,0,0.017818666994571686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,1,64,128,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,1,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,2,64,128,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,2,64,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,2,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,2,64,128,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,2,64,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,2,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,4,64,128,1,float16,float16,0,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,4,64,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,4,64,128,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,4,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,4,64,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,4,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,1,64,128,1,float16,float16,0,0.09758399923642476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,1,64,0,1,float16,float16,0,0.09697066744168599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,1,64,128,1,float16,fp8,0,0.09523733456929524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,1,64,128,1,fp8,fp8,0,0.09339732925097148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,1,64,0,1,float16,fp8,0,0.09513599673906963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,1,64,0,1,fp8,fp8,0,0.09314666191736858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,2,64,128,1,float16,float16,0,0.09804800152778625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,2,64,0,1,float16,float16,0,0.09738133351008098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,2,64,128,1,float16,fp8,0,0.09655466675758362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,2,64,128,1,fp8,fp8,0,0.09291199843088786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,2,64,0,1,float16,fp8,0,0.09703999757766724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,2,64,0,1,fp8,fp8,0,0.09151466687520345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,4,64,128,1,float16,float16,0,0.10115733742713928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,4,64,0,1,float16,float16,0,0.10123733679453532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,4,64,128,1,float16,fp8,0,0.09924266735712688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,4,64,128,1,fp8,fp8,0,0.09742933511734009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,4,64,0,1,float16,fp8,0,0.09984532992045085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,8,64,128,1,float16,float16,0,0.05423999826113383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,4,64,0,1,fp8,fp8,0,0.09915733337402344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,8,64,0,1,float16,float16,0,0.056234667698542275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,8,64,128,1,float16,fp8,0,0.05515733361244202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,8,64,128,1,fp8,fp8,0,0.05856533348560333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,8,64,0,1,float16,fp8,0,0.054842665791511536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,8,64,0,1,fp8,fp8,0,0.05798399945100149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,1,64,128,1,float16,float16,0,0.05436799923578898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,1,64,0,1,float16,float16,0,0.05383466680844625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,1,64,128,1,float16,fp8,0,0.05403199791908264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,1,64,128,1,fp8,fp8,0,0.051957334081331887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,1,64,0,1,float16,fp8,0,0.054085334142049156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,1,64,0,1,fp8,fp8,0,0.05215999980767568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,2,64,128,1,float16,float16,0,0.05429866909980774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,2,64,0,1,float16,float16,0,0.05409066875775655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,2,64,128,1,float16,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,2,64,128,1,fp8,fp8,0,0.05064533154169718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,2,64,0,1,float16,fp8,0,0.054058666030565895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,2,64,0,1,fp8,fp8,0,0.0510453333457311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,4,64,128,1,float16,float16,0,0.054272000988324486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,4,64,0,1,float16,float16,0,0.05632533133029938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,4,64,128,1,float16,fp8,0,0.05400000015894572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,4,64,128,1,fp8,fp8,0,0.054431999723116554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,4,64,0,1,float16,fp8,0,0.05407466491063436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,4,64,0,1,fp8,fp8,0,0.05460800230503082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,8,64,128,1,float16,float16,0,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,8,64,0,1,float16,float16,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,8,64,128,1,float16,fp8,0,0.03570666660865148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,8,64,128,1,fp8,fp8,0,0.03770133356253306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,8,64,0,1,float16,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,8,64,0,1,fp8,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,1,64,128,1,float16,float16,0,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,1,64,0,1,float16,float16,0,0.035573333501815796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,1,64,128,1,float16,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,1,64,128,1,fp8,fp8,0,0.03347733368476232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,1,64,0,1,float16,fp8,0,0.03585066646337509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,1,64,0,1,fp8,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,2,64,128,1,float16,float16,0,0.03575466573238373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,2,64,0,1,float16,float16,0,0.036330667634805046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,2,64,128,1,float16,fp8,0,0.0353973334034284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,2,64,128,1,fp8,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,2,64,0,1,float16,fp8,0,0.03580799947182337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,2,64,0,1,fp8,fp8,0,0.03555733213822047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,4,64,128,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,4,64,0,1,float16,float16,0,0.037791999677817024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,4,64,128,1,float16,fp8,0,0.037104000647862755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,4,64,128,1,fp8,fp8,0,0.036602665980656944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,4,64,0,1,float16,fp8,0,0.03578133384386698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,4,64,0,1,fp8,fp8,0,0.03605333218971888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,8,64,128,1,float16,float16,0,0.023589332898457844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,8,64,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,8,64,128,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,8,64,0,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,8,64,128,1,fp8,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,8,64,0,1,fp8,fp8,0,0.023775999744733173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,1,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,1,64,0,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,1,64,128,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,1,64,128,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,1,64,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,1,64,0,1,fp8,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,2,64,0,1,float16,float16,0,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,2,64,128,1,float16,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,2,64,128,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,2,64,128,1,float16,float16,0,0.02458133300145467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,2,64,0,1,float16,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,2,64,0,1,fp8,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,4,64,128,1,float16,float16,0,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,4,64,0,1,float16,float16,0,0.024256000916163128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,4,64,128,1,float16,fp8,0,0.0240639994541804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,4,64,128,1,fp8,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,4,64,0,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,8,64,128,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,8,64,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,8,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,4,64,0,1,fp8,fp8,0,0.024405332903067272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,8,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,8,64,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,8,64,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,1,64,128,1,float16,float16,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,1,64,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,1,64,128,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,1,64,128,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,1,64,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,1,64,0,1,float16,fp8,0,0.01972266659140587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,2,64,128,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,2,64,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,2,64,128,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,2,64,128,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,2,64,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,2,64,0,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,4,64,128,1,float16,float16,0,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,4,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,4,64,0,1,float16,float16,0,0.019727999965349834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,4,64,128,1,fp8,fp8,0,0.018496000518401463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,4,64,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,4,64,0,1,fp8,fp8,0,0.017925333231687546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,8,64,128,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,8,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,8,64,128,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,8,64,128,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,8,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,8,64,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,1,64,128,1,float16,float16,0,0.015850666910409927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,1,64,0,1,float16,float16,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,1,64,128,1,float16,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,1,64,128,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,1,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,1,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,2,64,128,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,2,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,2,64,128,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,2,64,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,2,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,2,64,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,4,64,128,1,float16,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,4,64,128,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,4,64,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,4,64,128,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,4,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,4,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,8,64,128,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,8,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,8,64,128,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,8,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,8,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,8,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,1,64,128,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,1,64,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,1,64,128,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,1,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,1,64,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,1,64,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,2,64,128,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,2,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,2,64,128,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,2,64,128,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,2,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,4,64,128,1,float16,float16,0,0.01640533283352852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,2,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,4,64,0,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,4,64,128,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,4,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,4,64,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,4,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,8,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,8,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,8,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,8,64,128,1,fp8,fp8,0,0.015824000040690105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,8,64,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,8,64,0,1,fp8,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,1,64,128,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,1,64,128,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,1,64,128,1,fp8,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,1,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,1,64,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,2,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,2,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,2,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,2,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,2,64,0,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,2,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,4,64,128,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,4,64,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,4,64,128,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,4,64,128,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,4,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,8,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,8,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,8,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,8,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,8,64,128,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,8,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,1,64,128,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,1,64,128,1,float16,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,1,64,128,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,1,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,1,64,0,1,fp8,fp8,0,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,2,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,2,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,2,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,2,64,128,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,2,64,0,1,float16,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,2,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,4,64,128,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,4,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,4,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,4,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,4,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,4,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,1,64,128,1,float16,float16,0,0.06864533325036366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,1,64,0,1,float16,float16,0,0.06877866884072621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,1,64,128,1,fp8,fp8,0,0.0660693347454071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,1,64,128,1,float16,fp8,0,0.06874666611353557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,1,64,0,1,float16,fp8,0,0.06851199766000111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,1,64,0,1,fp8,fp8,0,0.06567466755708058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,2,64,128,1,float16,float16,0,0.06853866577148438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,2,64,0,1,float16,float16,0,0.06895466645558675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,2,64,128,1,float16,fp8,0,0.07019199927647908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,2,64,128,1,fp8,fp8,0,0.0645653357108434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,2,64,0,1,float16,fp8,0,0.06889066596825917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,2,64,0,1,fp8,fp8,0,0.06635199983914693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,4,64,128,1,float16,float16,0,0.07063999772071838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,4,64,0,1,float16,float16,0,0.07011199990908305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,4,64,128,1,fp8,fp8,0,0.06770133475462596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,4,64,128,1,float16,fp8,0,0.07032533486684163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,4,64,0,1,float16,fp8,0,0.07077333331108093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,8,64,128,1,float16,float16,0,0.04205866654713949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,4,64,0,1,fp8,fp8,0,0.06860266625881195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,8,64,0,1,float16,float16,0,0.04178666571776072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,8,64,128,1,float16,fp8,0,0.044031997521718345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,8,64,128,1,fp8,fp8,0,0.04218133290608724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,8,64,0,1,float16,fp8,0,0.042090664307276406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,8,64,0,1,fp8,fp8,0,0.042026668787002563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,1,64,128,1,float16,float16,0,0.042175998290379844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,1,64,0,1,float16,float16,0,0.04200533529122671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,1,64,128,1,float16,fp8,0,0.04194133480389913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,1,64,128,1,fp8,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,1,64,0,1,fp8,fp8,0,0.039994666973749794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,1,64,0,1,float16,fp8,0,0.041706666350364685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,2,64,128,1,float16,float16,0,0.04205866654713949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,2,64,0,1,float16,float16,0,0.04200533529122671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,2,64,128,1,float16,fp8,0,0.04182933270931244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,2,64,128,1,fp8,fp8,0,0.039647998909155525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,2,64,0,1,float16,fp8,0,0.04205333193143209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,2,64,0,1,fp8,fp8,0,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,4,64,128,1,float16,float16,0,0.04372799893220266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,4,64,128,1,float16,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,4,64,0,1,float16,float16,0,0.04362666606903076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,4,64,128,1,fp8,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,8,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,4,64,0,1,float16,fp8,0,0.042064001162846885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,4,64,0,1,fp8,fp8,0,0.04218133290608724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,8,64,0,1,float16,float16,0,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,8,64,128,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,8,64,128,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,8,64,0,1,float16,fp8,0,0.02777066578467687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,8,64,0,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,1,64,128,1,float16,float16,0,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,1,64,0,1,float16,float16,0,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,1,64,128,1,float16,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,1,64,128,1,fp8,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,1,64,0,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,1,64,0,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,2,64,128,1,float16,float16,0,0.027749332288901012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,2,64,0,1,float16,float16,0,0.028037334481875103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,2,64,128,1,float16,fp8,0,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,2,64,128,1,fp8,fp8,0,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,2,64,0,1,float16,fp8,0,0.027957332630952198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,4,64,0,1,float16,float16,0,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,4,64,128,1,float16,float16,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,2,64,0,1,fp8,fp8,0,0.02807466685771942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,4,64,128,1,float16,fp8,0,0.028143999477227528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,4,64,128,1,fp8,fp8,0,0.03029866764942805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,4,64,0,1,float16,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,8,64,128,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,4,64,0,1,fp8,fp8,0,0.029525332152843475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,8,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,8,64,128,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,8,64,128,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,8,64,0,1,float16,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,8,64,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,1,64,128,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,1,64,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,1,64,128,1,float16,fp8,0,0.019871999820073444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,1,64,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,1,64,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,1,64,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,2,64,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,2,64,128,1,float16,float16,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,2,64,128,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,2,64,128,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,2,64,0,1,float16,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,2,64,0,1,fp8,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,4,64,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,4,64,128,1,float16,float16,0,0.019760000209013622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,4,64,128,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,4,64,128,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,4,64,0,1,float16,fp8,0,0.020821332931518555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,4,64,0,1,fp8,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,8,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,8,64,0,1,float16,float16,0,0.01600533351302147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,8,64,128,1,float16,fp8,0,0.01759999990463257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,8,64,128,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,8,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,8,64,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,1,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,1,64,0,1,float16,float16,0,0.01759999990463257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,1,64,128,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,1,64,128,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,1,64,0,1,float16,fp8,0,0.01658133293191592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,1,64,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,2,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,2,64,0,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,2,64,128,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,2,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,2,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,2,64,0,1,fp8,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,4,64,128,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,4,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,4,64,128,1,float16,fp8,0,0.0162773331006368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,4,64,128,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,4,64,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,4,64,0,1,fp8,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,8,64,128,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,8,64,0,1,float16,float16,0,0.01626666635274887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,8,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,8,64,128,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,8,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,8,64,0,1,fp8,fp8,0,0.014815999815861383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,1,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,1,64,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,1,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,1,64,128,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,1,64,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,1,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,2,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,2,64,128,1,float16,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,2,64,0,1,float16,float16,0,0.016063999384641647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,2,64,128,1,fp8,fp8,0,0.016255999604860943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,2,64,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,2,64,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,4,64,128,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,4,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,4,64,128,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,4,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,4,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,4,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,8,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,8,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,8,64,128,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,8,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,8,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,8,64,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,1,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,1,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,1,64,128,1,float16,fp8,0,0.01594666639963786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,1,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,1,64,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,1,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,2,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,2,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,2,64,128,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,2,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,2,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,2,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,4,64,128,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,4,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,4,64,128,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,4,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,4,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,4,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,8,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,8,64,0,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,8,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,8,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,8,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,8,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,1,64,128,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,1,64,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,1,64,128,1,float16,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,1,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,1,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,1,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,2,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,2,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,2,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,2,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,2,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,2,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,4,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,4,64,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,4,64,128,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,4,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,4,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,4,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,8,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,8,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,8,64,128,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,8,64,128,1,fp8,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,8,64,0,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,8,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,1,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,1,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,1,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,1,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,1,64,0,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,2,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,2,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,1,64,0,1,fp8,fp8,0,0.016528000434239704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,2,64,128,1,float16,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,2,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,2,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,2,64,0,1,float16,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,4,64,128,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,4,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,4,64,128,1,float16,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,4,64,128,1,fp8,fp8,0,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,4,64,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,4,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,1,64,128,1,float16,float16,0,0.05596800148487091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,1,64,0,1,float16,float16,0,0.058143998185793556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,1,64,128,1,float16,fp8,0,0.05675200124581655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,1,64,128,1,fp8,fp8,0,0.05398400127887726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,1,64,0,1,float16,fp8,0,0.058058664202690125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,1,64,0,1,fp8,fp8,0,0.05453866720199585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,2,64,128,1,float16,float16,0,0.05795200169086456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,2,64,0,1,float16,float16,0,0.058287998040517174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,2,64,128,1,fp8,fp8,0,0.0543039987484614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,2,64,128,1,float16,fp8,0,0.058575997749964394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,2,64,0,1,float16,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,2,64,0,1,fp8,fp8,0,0.054383998115857445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,4,64,128,1,float16,float16,0,0.05806399881839752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,4,64,0,1,float16,float16,0,0.058090666929880776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,4,64,128,1,float16,fp8,0,0.058335999647776283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,4,64,128,1,fp8,fp8,0,0.05608533322811127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,4,64,0,1,float16,fp8,0,0.058101331194241844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,4,64,0,1,fp8,fp8,0,0.05566399792830149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,8,64,128,1,float16,float16,0,0.03587199995915095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,8,64,0,1,float16,float16,0,0.03382933388153712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,8,64,128,1,float16,fp8,0,0.033786666889985405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,8,64,0,1,float16,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,8,64,128,1,fp8,fp8,0,0.03566933423280716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,8,64,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,1,64,128,1,float16,float16,0,0.035674666364987694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,1,64,0,1,float16,float16,0,0.03573866685231527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,1,64,128,1,float16,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,1,64,0,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,1,64,128,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,1,64,0,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,2,64,128,1,float16,float16,0,0.03401600072781245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,2,64,0,1,float16,float16,0,0.03388266762097677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,2,64,128,1,float16,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,2,64,128,1,fp8,fp8,0,0.03332266708215078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,2,64,0,1,float16,fp8,0,0.03555733213822047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,2,64,0,1,fp8,fp8,0,0.033333333830038704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,4,64,128,1,float16,float16,0,0.034645333886146545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,4,64,0,1,float16,float16,0,0.035717333356539406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,4,64,128,1,float16,fp8,0,0.033701332906881966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,4,64,128,1,fp8,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,4,64,0,1,float16,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,8,64,128,1,float16,float16,0,0.02498133232196172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,4,64,0,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,8,64,0,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,8,64,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,8,64,128,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,8,64,0,1,float16,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,8,64,0,1,fp8,fp8,0,0.025120000044504803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,1,64,128,1,float16,float16,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,1,64,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,1,64,128,1,float16,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,1,64,128,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,1,64,0,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,1,64,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,2,64,0,1,float16,float16,0,0.02306666721900304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,2,64,128,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,2,64,128,1,float16,float16,0,0.024362665911515553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,2,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,2,64,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,2,64,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,4,64,0,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,4,64,128,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,4,64,128,1,float16,float16,0,0.02422933280467987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,4,64,128,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,4,64,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,4,64,0,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,8,64,128,1,float16,float16,0,0.020106667031844456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,8,64,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,8,64,128,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,8,64,128,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,8,64,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,1,64,128,1,float16,float16,0,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,8,64,0,1,float16,fp8,0,0.02013333390156428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,1,64,0,1,float16,float16,0,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,1,64,128,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,1,64,128,1,fp8,fp8,0,0.018874666343132656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,1,64,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,1,64,0,1,float16,fp8,0,0.01969066634774208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,2,64,128,1,float16,float16,0,0.01841066653529803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,2,64,0,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,2,64,128,1,float16,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,2,64,128,1,fp8,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,2,64,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,2,64,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,4,64,128,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,4,64,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,4,64,128,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,4,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,4,64,0,1,float16,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,8,64,128,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,4,64,0,1,fp8,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,8,64,0,1,float16,float16,0,0.016010666886965435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,8,64,128,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,8,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,8,64,128,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,8,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,1,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,1,64,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,1,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,1,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,1,64,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,1,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,2,64,128,1,float16,float16,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,2,64,128,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,2,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,2,64,128,1,fp8,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,2,64,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,2,64,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,4,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,4,64,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,4,64,128,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,4,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,4,64,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,4,64,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,8,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,8,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,8,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,8,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,8,64,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,1,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,8,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,1,64,0,1,float16,float16,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,1,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,1,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,1,64,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,1,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,2,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,2,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,2,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,2,64,128,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,2,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,2,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,4,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,4,64,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,4,64,128,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,4,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,4,64,0,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,8,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,4,64,0,1,fp8,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,8,64,0,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,8,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,8,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,8,64,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,8,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,1,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,1,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,1,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,1,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,1,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,1,64,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,2,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,2,64,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,2,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,2,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,2,64,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,2,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,4,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,4,64,0,1,float16,float16,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,4,64,128,1,float16,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,4,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,4,64,0,1,float16,fp8,0,0.016442666451136272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,4,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,8,64,128,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,8,64,128,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,8,64,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,8,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,8,64,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,8,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,1,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,1,64,128,1,fp8,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,1,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,1,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,1,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,2,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,2,64,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,2,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,2,64,128,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,2,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,2,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,4,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,4,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,4,64,128,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,4,64,128,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,4,64,0,1,float16,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,4,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,8,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,8,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,8,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,8,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,8,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,8,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,1,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,1,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,1,64,128,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,1,64,128,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,1,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,2,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,1,64,0,1,fp8,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,2,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,2,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,2,64,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,2,64,128,1,fp8,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,2,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,4,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,4,64,128,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,4,64,0,1,float16,float16,0,0.015834666788578033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,4,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,4,64,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,4,64,0,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,1,64,128,1,float16,float16,0,0.050250664353370667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,1,64,0,1,float16,float16,0,0.05180799961090088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,1,64,128,1,float16,fp8,0,0.05091733237107595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,1,64,128,1,fp8,fp8,0,0.048058668772379555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,1,64,0,1,float16,fp8,0,0.05212266743183136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,2,64,128,1,float16,float16,0,0.05006400247414907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,1,64,0,1,fp8,fp8,0,0.0487360010544459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,2,64,0,1,float16,float16,0,0.05223466455936432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,2,64,128,1,float16,fp8,0,0.05003733436266581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,2,64,128,1,fp8,fp8,0,0.04857600231965383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,2,64,0,1,float16,fp8,0,0.05018133421738943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,2,64,0,1,fp8,fp8,0,0.04778666794300079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,4,64,0,1,float16,float16,0,0.05029866596062978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,4,64,128,1,float16,float16,0,0.05193066596984863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,4,64,128,1,float16,fp8,0,0.051829333106676735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,4,64,128,1,fp8,fp8,0,0.04842666784922282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,4,64,0,1,float16,fp8,0,0.05039466420809428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,4,64,0,1,fp8,fp8,0,0.04877333343029022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,8,64,128,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,8,64,0,1,float16,float16,0,0.03148266673088074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,8,64,128,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,8,64,128,1,fp8,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,8,64,0,1,float16,fp8,0,0.03146133323510488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,8,64,0,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,1,64,128,1,float16,float16,0,0.03156800071398417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,1,64,0,1,float16,float16,0,0.030981334547201794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,1,64,128,1,float16,fp8,0,0.030042665700117748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,1,64,128,1,fp8,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,1,64,0,1,float16,fp8,0,0.03203733265399933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,1,64,0,1,fp8,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,2,64,128,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,2,64,0,1,float16,float16,0,0.03142400085926056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,2,64,128,1,float16,fp8,0,0.03029866764942805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,2,64,128,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,2,64,0,1,float16,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,2,64,0,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,4,64,128,1,float16,float16,0,0.03170666595300039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,4,64,0,1,float16,float16,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,4,64,128,1,float16,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,4,64,128,1,fp8,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,4,64,0,1,float16,fp8,0,0.03182933231194814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,4,64,0,1,fp8,fp8,0,0.03141866624355316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,8,64,0,1,float16,float16,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,8,64,128,1,float16,float16,0,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,8,64,128,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,8,64,128,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,8,64,0,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,1,64,128,1,float16,float16,0,0.02347733328739802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,1,64,0,1,float16,float16,0,0.022469334304332733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,8,64,0,1,fp8,fp8,0,0.02367466688156128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,1,64,128,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,1,64,128,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,1,64,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,2,64,128,1,float16,float16,0,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,1,64,0,1,fp8,fp8,0,0.021759999295075733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,2,64,128,1,float16,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,2,64,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,2,64,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,2,64,128,1,fp8,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,2,64,0,1,fp8,fp8,0,0.022890667120615642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,4,64,128,1,float16,float16,0,0.022629333039124806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,4,64,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,4,64,128,1,float16,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,4,64,128,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,4,64,0,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,8,64,128,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,4,64,0,1,fp8,fp8,0,0.02214933435122172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,8,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,8,64,128,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,8,64,0,1,float16,fp8,0,0.018122666825850803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,8,64,128,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,8,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,1,64,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,1,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,1,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,1,64,128,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,1,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,2,64,128,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,1,64,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,2,64,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,2,64,128,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,2,64,128,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,2,64,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,2,64,0,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,4,64,128,1,float16,float16,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,4,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,4,64,128,1,float16,fp8,0,0.017994667092959087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,4,64,128,1,fp8,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,4,64,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,4,64,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,8,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,8,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,8,64,128,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,8,64,128,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,8,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,8,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,1,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,1,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,1,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,1,64,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,1,64,128,1,fp8,fp8,0,0.016607999801635742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,1,64,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,2,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,2,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,2,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,2,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,2,64,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,2,64,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,4,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,4,64,0,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,4,64,128,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,4,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,4,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,4,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,8,64,128,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,8,64,128,1,float16,fp8,0,0.016058667252461117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,8,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,8,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,8,64,0,1,float16,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,8,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,1,64,128,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,1,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,1,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,1,64,128,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,1,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,1,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,2,64,0,1,float16,float16,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,2,64,128,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,2,64,128,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,2,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,2,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,2,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,4,64,128,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,4,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,4,64,128,1,float16,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,4,64,128,1,fp8,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,4,64,0,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,4,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,8,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,8,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,8,64,128,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,8,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,8,64,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,8,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,1,64,128,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,1,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,1,64,128,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,1,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,1,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,1,64,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,2,64,128,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,2,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,2,64,128,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,2,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,2,64,0,1,float16,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,2,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,4,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,4,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,4,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,4,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,4,64,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,4,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,8,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,8,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,8,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,8,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,8,64,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,8,64,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,1,64,128,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,1,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,1,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,1,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,1,64,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,1,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,2,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,2,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,2,64,128,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,2,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,2,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,2,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,4,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,4,64,0,1,float16,float16,0,0.014682666709025701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,4,64,128,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,4,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,4,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,4,64,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,8,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,8,64,128,1,float16,float16,0,0.016106666376193363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,8,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,8,64,128,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,8,64,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,8,64,0,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,1,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,1,64,128,1,float16,fp8,0,0.015647999942302704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,1,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,1,64,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,2,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,2,64,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,1,64,0,1,fp8,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,2,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,2,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,2,64,0,1,float16,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,2,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,4,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,4,64,0,1,float16,float16,0,0.01591466615597407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,4,64,128,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,4,64,128,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,4,64,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,4,64,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,0,0.04582933088143667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,0,0.04562666515509287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,0,0.04604800045490265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,1,64,128,1,fp8,fp8,0,0.04196799794832865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,0,0.045781334241231285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,1,64,0,1,fp8,fp8,0,0.04211199780305227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,0,0.045824001232783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,0,0.043893332282702126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,2,64,128,1,fp8,fp8,0,0.041749333341916404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,0,0.04632000128428141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,0,0.04390933116277059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,2,64,0,1,fp8,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,0,0.04611733555793762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,0,0.04399466514587402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,0,0.04593066871166229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,4,64,128,1,fp8,fp8,0,0.044010668992996216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,0,0.044026667873064675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,4,64,0,1,fp8,fp8,0,0.04177066683769226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,8,64,128,1,float16,float16,0,0.029866665601730347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,8,64,128,1,float16,fp8,0,0.029215998947620392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,8,64,128,1,fp8,fp8,0,0.02802666773398717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,8,64,0,1,fp8,fp8,0,0.02775999903678894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,0,0.029333333174387615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,1,64,128,1,fp8,fp8,0,0.027669332921504974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,1,64,0,1,fp8,fp8,0,0.0278613343834877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,0,0.029653333127498627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,2,64,128,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,0,0.029701332251230877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,2,64,0,1,fp8,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,0,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,4,64,128,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,8,64,128,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,4,64,0,1,fp8,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,8,64,128,1,float16,fp8,0,0.022789334257443745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,8,64,128,1,fp8,fp8,0,0.021946666141351063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,8,64,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,0,0.022287999590237934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,1,64,128,1,fp8,fp8,0,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,1,64,0,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,0,0.02386133372783661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,0,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,0,0.023061332603295643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,2,64,128,1,fp8,fp8,0,0.02184533327817917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,2,64,0,1,fp8,fp8,0,0.022122666239738464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,0,0.022272000710169475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,4,64,128,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,4,64,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,8,64,128,1,float16,float16,0,0.018405333161354065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,8,64,128,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,8,64,128,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,8,64,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,1,64,128,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,1,64,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,2,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,0,0.01844800015290578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,2,64,0,1,fp8,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,4,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,4,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,8,64,128,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,8,64,128,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,8,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,8,64,0,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,1,64,128,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,1,64,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,0,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,2,64,128,1,fp8,fp8,0,0.016794666647911072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,2,64,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,4,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,8,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,4,64,0,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,8,64,128,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,8,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,8,64,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,1,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,1,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,0,0.01571200042963028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,2,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,2,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,4,64,128,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,4,64,0,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,8,64,128,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,8,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,8,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,8,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,1,64,128,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,1,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,2,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,2,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,4,64,128,1,fp8,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,4,64,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,8,64,128,1,float16,float16,0,0.013967999567588171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,8,64,128,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,8,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,8,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,1,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,1,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,0,0.015722667177518208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,0,0.014202666779359182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,2,64,128,1,fp8,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,2,64,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,4,64,128,1,fp8,fp8,0,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,4,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,8,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,8,64,128,1,float16,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,8,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,8,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,1,64,128,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,1,64,0,1,fp8,fp8,0,0.014720000326633453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,2,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,2,64,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,4,64,128,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,4,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,float16,0,0.22066134214401245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,1,64,128,1,float16,fp8,0,0.22079465786616007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,1,64,128,1,fp8,fp8,0,0.21332800388336182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,float16,0,1.151962677637736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,float16,0,0.23110934098561606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,1,64,0,1,float16,fp8,0,1.155344009399414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,1,64,0,1,fp8,fp8,0,1.0091893672943115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,2,64,128,1,float16,fp8,0,0.23344000180562338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,2,64,128,1,fp8,fp8,0,0.2249280015627543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,float16,0,1.164394696553548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,float16,0,0.13428800304730734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,2,64,0,1,float16,fp8,0,1.1694186528523762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,2,64,0,1,fp8,fp8,0,1.0208160082499187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,4,64,128,1,float16,fp8,0,0.13818132877349854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,float16,0,0.6440213521321615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,4,64,128,1,fp8,fp8,0,0.13404267032941183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,float16,0,0.1185706655184428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,4,64,0,1,fp8,fp8,0,0.5685813426971436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,4,64,0,1,float16,fp8,0,0.6514666477839152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,1,64,128,1,float16,fp8,0,0.12100266416867574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,float16,0,0.6287680069605509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,1,64,128,1,fp8,fp8,0,0.11574400464693706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,float16,0,0.12369599938392639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,1,64,0,1,float16,fp8,0,0.6300319830576578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,1,64,0,1,fp8,fp8,0,0.548960010210673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,2,64,128,1,float16,fp8,0,0.12545599540074667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,float16,0,0.6308693488438925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,2,64,128,1,fp8,fp8,0,0.12398399909337361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,float16,0,0.0792746643225352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,2,64,0,1,float16,fp8,0,0.6344426472981771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,2,64,0,1,fp8,fp8,0,0.5578879912694296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,4,64,128,1,float16,fp8,0,0.08065066734949748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,float16,0,0.3730613390604655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,4,64,128,1,fp8,fp8,0,0.0806826651096344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,float16,0,0.07313066720962524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,4,64,0,1,float16,fp8,0,0.37717334429423016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,4,64,0,1,fp8,fp8,0,0.33075199524561566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,float16,0,0.3666773239771525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,1,64,128,1,float16,fp8,0,0.07437866429487865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,1,64,128,1,fp8,fp8,0,0.07231999933719635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,1,64,0,1,float16,fp8,0,0.36869335174560547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,float16,0,0.07487999896208446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,1,64,0,1,fp8,fp8,0,0.3190026680628459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,float16,0,0.369648019472758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,2,64,128,1,float16,fp8,0,0.07645333309968312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,2,64,128,1,fp8,fp8,0,0.07481599847475688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,float16,0,0.060362666845321655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,2,64,0,1,float16,fp8,0,0.3701333204905192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,2,64,0,1,fp8,fp8,0,0.32250134150187176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,float16,0,0.25067732731501263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,4,64,128,1,float16,fp8,0,0.05985599756240845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,4,64,128,1,fp8,fp8,0,0.05817066629727682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,4,64,0,1,fp8,fp8,0,0.21944000323613486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,4,64,0,1,float16,fp8,0,0.24941867589950562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,float16,0,0.06005333364009857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,float16,0,0.24887466430664062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,1,64,128,1,float16,fp8,0,0.060592000683148704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,1,64,128,1,fp8,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,1,64,0,1,float16,fp8,0,0.2508053382237752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,1,64,0,1,fp8,fp8,0,0.21805334091186523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,float16,0,0.060229331254959106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,float16,0,0.2508693337440491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,2,64,128,1,float16,fp8,0,0.060271998246510826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,2,64,128,1,fp8,fp8,0,0.058117335041364036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,2,64,0,1,fp8,fp8,0,0.21809599796930948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,2,64,0,1,float16,fp8,0,0.25091199080149335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,float16,0,0.1678559978802999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,1,64,128,1,float16,fp8,0,0.17054933309555054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,1,64,128,1,fp8,fp8,0,0.16292799512545267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,float16,0,0.7019680341084799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,1,64,0,1,fp8,fp8,0,0.6185280084609985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,float16,0,0.17534933487574259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,1,64,0,1,float16,fp8,0,0.7040639718373617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,2,64,128,1,float16,fp8,0,0.17813332875569662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,2,64,128,1,fp8,fp8,0,0.17297067244847616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,float16,0,0.7119413216908773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,float16,0,0.10470400253931682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,2,64,0,1,fp8,fp8,0,0.6290880044301351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,2,64,0,1,float16,fp8,0,0.7131733099619547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,4,64,128,1,float16,fp8,0,0.10745599865913391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,4,64,128,1,fp8,fp8,0,0.10702932874361674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,float16,0,0.40406401952107746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,float16,0,0.09352533022562663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,4,64,0,1,float16,fp8,0,0.4066026608149211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,4,64,0,1,fp8,fp8,0,0.3597866694132487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,1,64,128,1,float16,fp8,0,0.09492799639701843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,1,64,128,1,fp8,fp8,0,0.09090133508046468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,float16,0,0.3915040095647176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,1,64,0,1,fp8,fp8,0,0.3397386471430461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,float16,0,0.09724799791971843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,1,64,0,1,float16,fp8,0,0.39057600498199463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,2,64,128,1,float16,fp8,0,0.09939733147621155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,2,64,128,1,fp8,fp8,0,0.09551999966303508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,float16,0,0.39158399899800617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,float16,0,0.062224000692367554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,2,64,0,1,fp8,fp8,0,0.3487093448638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,2,64,0,1,float16,fp8,0,0.39587732156117755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,4,64,128,1,float16,fp8,0,0.06419733166694641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,4,64,128,1,fp8,fp8,0,0.06271466612815857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,float16,0,0.24317334095637003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,4,64,0,1,float16,fp8,0,0.2430079976717631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,float16,0,0.05885333319505056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,4,64,0,1,fp8,fp8,0,0.21321600675582886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,1,64,128,1,float16,fp8,0,0.05816000203291575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,float16,0,0.23894399404525757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,1,64,128,1,fp8,fp8,0,0.05756799876689911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,1,64,0,1,float16,fp8,0,0.23869333664576212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,1,64,0,1,fp8,fp8,0,0.2079733411471049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,float16,0,0.06113600234190623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,float16,0,0.23690134286880493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,2,64,128,1,float16,fp8,0,0.06051200131575266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,2,64,128,1,fp8,fp8,0,0.05829333265622457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,2,64,0,1,float16,fp8,0,0.24090667565663657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,float16,0,0.05036800106366476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,2,64,0,1,fp8,fp8,0,0.2093600034713745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,float16,0,0.16668800512949625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,4,64,128,1,fp8,fp8,0,0.05041066805521647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,4,64,128,1,float16,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,4,64,0,1,float16,fp8,0,0.16473066806793213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,4,64,0,1,fp8,fp8,0,0.14616533120473227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,float16,0,0.05159999926884969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,float16,0,0.16541866461435953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,1,64,128,1,float16,fp8,0,0.05007466673851013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,1,64,128,1,fp8,fp8,0,0.049914668003718056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,1,64,0,1,float16,fp8,0,0.16620799899101257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,1,64,0,1,fp8,fp8,0,0.14621866742769876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,float16,0,0.052111998200416565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,float16,0,0.166703999042511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,2,64,128,1,float16,fp8,0,0.05236266553401947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,2,64,128,1,fp8,fp8,0,0.049882665276527405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,2,64,0,1,float16,fp8,0,0.16519999504089355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,2,64,0,1,fp8,fp8,0,0.1463466684023539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,float16,0,0.14011200269063315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,1,64,128,1,float16,fp8,0,0.1416266659895579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,1,64,128,1,fp8,fp8,0,0.13822933038075766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,float16,0,0.5204213460286459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,1,64,0,1,float16,fp8,0,0.5184693336486816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,float16,0,0.14681067069371542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,1,64,0,1,fp8,fp8,0,0.45790934562683105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,2,64,128,1,float16,fp8,0,0.14903466900189719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,2,64,128,1,fp8,fp8,0,0.14538666605949402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,float16,0,0.5256799856821696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,float16,0,0.08874133229255676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,2,64,0,1,float16,fp8,0,0.5275786717732748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,float16,0,0.30235199133555096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,2,64,0,1,fp8,fp8,0,0.4659946759541829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,4,64,128,1,float16,fp8,0,0.09146666526794434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,4,64,128,1,fp8,fp8,0,0.09116799632708232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,4,64,0,1,float16,fp8,0,0.3054986596107483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,float16,0,0.08054933448632558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,4,64,0,1,fp8,fp8,0,0.2719893256823222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,float16,0,0.2934239904085795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,1,64,128,1,float16,fp8,0,0.08097066481908162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,1,64,128,1,fp8,fp8,0,0.07874666651089986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,1,64,0,1,float16,fp8,0,0.2951093316078186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,1,64,0,1,fp8,fp8,0,0.2569013237953186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,float16,0,0.08297599852085114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,float16,0,0.29586132367451984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,2,64,128,1,float16,fp8,0,0.08409600456555684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,2,64,128,1,fp8,fp8,0,0.08169599870840709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,2,64,0,1,float16,fp8,0,0.29810667037963867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,2,64,0,1,fp8,fp8,0,0.25965332984924316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,float16,0,0.055999999245007835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,float16,0,0.18915732701619467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,4,64,128,1,float16,fp8,0,0.058490668733914696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,4,64,128,1,fp8,fp8,0,0.0562720000743866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,4,64,0,1,float16,fp8,0,0.19127466281255087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,float16,0,0.05385600030422211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,4,64,0,1,fp8,fp8,0,0.16778133312861124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,1,64,128,1,float16,fp8,0,0.05436799923578898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,1,64,128,1,fp8,fp8,0,0.05229333539803823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,float16,0,0.1874986688296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,float16,0,0.05435200035572052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,1,64,0,1,float16,fp8,0,0.18758400281270346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,1,64,0,1,fp8,fp8,0,0.16315199931462607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,2,64,128,1,float16,fp8,0,0.05599466462930044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,2,64,128,1,fp8,fp8,0,0.053183997670809426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,float16,0,0.18754667043685913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,2,64,0,1,fp8,fp8,0,0.16454933087031046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,float16,0,0.046021332343419395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,2,64,0,1,float16,fp8,0,0.18777066469192505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,4,64,128,1,float16,fp8,0,0.04626133541266123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,4,64,128,1,fp8,fp8,0,0.04390400151411692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,float16,0,0.12615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,float16,0,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,4,64,0,1,fp8,fp8,0,0.11156266927719116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,4,64,0,1,float16,fp8,0,0.12612266341845194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,1,64,128,1,float16,fp8,0,0.04699199895064036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,1,64,128,1,fp8,fp8,0,0.046112000942230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,float16,0,0.12749333182970682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,1,64,0,1,fp8,fp8,0,0.11168000102043152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,float16,0,0.04606399933497111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,1,64,0,1,float16,fp8,0,0.12784000237782797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,2,64,128,1,float16,fp8,0,0.04605866471926371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,2,64,128,1,fp8,fp8,0,0.04364266494909922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,float16,0,0.12597866853078207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,2,64,0,1,fp8,fp8,0,0.1116426686445872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,2,64,0,1,float16,fp8,0,0.1277653376261393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,float16,0,0.21532267332077026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,1,64,128,1,float16,fp8,0,0.21621867020924887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,1,64,128,1,fp8,fp8,0,0.2078933318456014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,float16,0,0.6722453435262045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,1,64,0,1,float16,fp8,0,0.6707946459452311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,float16,0,0.22734934091567993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,1,64,0,1,fp8,fp8,0,0.5907466808954874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,2,64,128,1,float16,fp8,0,0.22992533445358276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,2,64,128,1,fp8,fp8,0,0.22021865844726562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,float16,0,0.6813706556955973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,float16,0,0.130213330189387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,2,64,0,1,fp8,fp8,0,0.6043093204498291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,2,64,0,1,float16,fp8,0,0.6857013702392578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,float16,0,0.3791573445002238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,4,64,128,1,float16,fp8,0,0.13297067085901895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,4,64,128,1,fp8,fp8,0,0.12819733222325644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,4,64,0,1,float16,fp8,0,0.38204801082611084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,4,64,0,1,fp8,fp8,0,0.3397759993871053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,float16,0,0.11179733276367188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,float16,0,0.3592746655146281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,1,64,128,1,float16,fp8,0,0.11359467109044392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,1,64,128,1,fp8,fp8,0,0.11160533626874287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,1,64,0,1,float16,fp8,0,0.3625653187433879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,float16,0,0.11795733372370402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,1,64,0,1,fp8,fp8,0,0.3211200038592021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,float16,0,0.36555198828379315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,2,64,128,1,float16,fp8,0,0.12001066406567891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,2,64,128,1,fp8,fp8,0,0.11783466736475627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,float16,0,0.07247466842333476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,2,64,0,1,float16,fp8,0,0.36826666196187335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,2,64,0,1,fp8,fp8,0,0.32705599069595337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,float16,0,0.21698667605717978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,4,64,128,1,float16,fp8,0,0.0746666689713796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,4,64,128,1,fp8,fp8,0,0.0759093314409256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,4,64,0,1,float16,fp8,0,0.21955200036366782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,4,64,0,1,fp8,fp8,0,0.19538132349650064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,float16,0,0.0662720004717509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,float16,0,0.21151467164357504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,1,64,128,1,float16,fp8,0,0.06735466420650482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,1,64,128,1,fp8,fp8,0,0.06464533507823944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,1,64,0,1,float16,fp8,0,0.21201600631078085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,1,64,0,1,fp8,fp8,0,0.18573866287867227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,float16,0,0.068122665087382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,float16,0,0.21220799287160239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,2,64,128,1,float16,fp8,0,0.06922666728496552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,2,64,128,1,fp8,fp8,0,0.06796266635258992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,2,64,0,1,float16,fp8,0,0.21366933981577554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,float16,0,0.04582933088143667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,2,64,0,1,fp8,fp8,0,0.18942399819691977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,float16,0,0.1400266687075297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,4,64,128,1,float16,fp8,0,0.04647466540336609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,4,64,128,1,fp8,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,4,64,0,1,float16,fp8,0,0.14229333400726318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,4,64,0,1,fp8,fp8,0,0.12435733278592427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,float16,0,0.044031997521718345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,float16,0,0.1381706694761912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,1,64,128,1,float16,fp8,0,0.043791999419530235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,1,64,128,1,fp8,fp8,0,0.042223999897638954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,1,64,0,1,float16,fp8,0,0.13909866412480673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,1,64,0,1,fp8,fp8,0,0.12193600336710612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,float16,0,0.04607999821503957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,float16,0,0.13806399703025818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,2,64,128,1,float16,fp8,0,0.045738667249679565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,2,64,128,1,fp8,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,2,64,0,1,float16,fp8,0,0.1400159994761149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,2,64,0,1,fp8,fp8,0,0.12361066540082295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,float16,0,0.037461332976818085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,float16,0,0.09930666287740071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,4,64,128,1,float16,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,4,64,128,1,fp8,fp8,0,0.0359946663180987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,4,64,0,1,float16,fp8,0,0.09914666414260864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,4,64,0,1,fp8,fp8,0,0.08890666564305623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,float16,0,0.03591466695070267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,float16,0,0.09935999910036723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,1,64,128,1,float16,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,1,64,128,1,fp8,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,1,64,0,1,float16,fp8,0,0.10033599535624187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,1,64,0,1,fp8,fp8,0,0.08893332878748576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,float16,0,0.035599999129772186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,2,64,128,1,float16,fp8,0,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,float16,0,0.09937066833178203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,2,64,128,1,fp8,fp8,0,0.03524799893299738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,2,64,0,1,fp8,fp8,0,0.08942400415738423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,2,64,0,1,float16,fp8,0,0.09912000099817912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,float16,0,0.16715733210245767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,1,64,128,1,float16,fp8,0,0.16784000396728516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,1,64,128,1,fp8,fp8,0,0.16224533319473267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,float16,0,0.4229280153910319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,float16,0,0.1755519906679789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,1,64,0,1,float16,fp8,0,0.42362133661905926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,1,64,0,1,fp8,fp8,0,0.3800693353017171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,2,64,128,1,float16,fp8,0,0.17574934164683023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,2,64,128,1,fp8,fp8,0,0.17307732502619425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,float16,0,0.43265068531036377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,float16,0,0.10147733489672343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,2,64,0,1,float16,fp8,0,0.4338239828745524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,2,64,0,1,fp8,fp8,0,0.3911199967066447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,float16,0,0.24591465791066489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,4,64,128,1,float16,fp8,0,0.10345600048700969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,4,64,128,1,fp8,fp8,0,0.1032533347606659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,4,64,0,1,float16,fp8,0,0.247216006120046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,4,64,0,1,fp8,fp8,0,0.2230293353398641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,float16,0,0.08933333555857341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,1,64,128,1,float16,fp8,0,0.09074133634567261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,1,64,128,1,fp8,fp8,0,0.08679466446240743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,float16,0,0.23036799828211466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,float16,0,0.09146666526794434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,1,64,0,1,fp8,fp8,0,0.20570133129755655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,1,64,0,1,float16,fp8,0,0.2339199980099996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,2,64,128,1,float16,fp8,0,0.09411199887593587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,2,64,128,1,fp8,fp8,0,0.0930560032526652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,float16,0,0.23389333486557007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,float16,0,0.05789866546789805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,2,64,0,1,fp8,fp8,0,0.21234132846196493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,float16,0,0.14443199833234152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,2,64,0,1,float16,fp8,0,0.23707199096679688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,4,64,128,1,float16,fp8,0,0.06009600063165029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,4,64,128,1,fp8,fp8,0,0.058304001887639366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,4,64,0,1,float16,fp8,0,0.14657066265741983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,4,64,0,1,fp8,fp8,0,0.13006933530171713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,float16,0,0.0525546669960022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,float16,0,0.13960533340771994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,1,64,128,1,float16,fp8,0,0.05413866539796194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,1,64,128,1,fp8,fp8,0,0.05231999854246775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,1,64,0,1,float16,fp8,0,0.14037866393725076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,float16,0,0.05637866755326589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,1,64,0,1,fp8,fp8,0,0.12382400035858154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,float16,0,0.14229333400726318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,2,64,128,1,float16,fp8,0,0.056015998125076294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,2,64,128,1,fp8,fp8,0,0.05425600210825602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,2,64,0,1,float16,fp8,0,0.14341866970062256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,float16,0,0.03953066716591517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,2,64,0,1,fp8,fp8,0,0.125791996717453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,float16,0,0.09709333380063374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,4,64,128,1,float16,fp8,0,0.0422986646493276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,4,64,128,1,fp8,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,4,64,0,1,float16,fp8,0,0.09730133414268494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,float16,0,0.039477333426475525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,4,64,0,1,fp8,fp8,0,0.08524266878763835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,float16,0,0.0946613351504008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,1,64,128,1,float16,fp8,0,0.03997333347797394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,1,64,128,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,1,64,0,1,float16,fp8,0,0.0936853289604187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,1,64,0,1,fp8,fp8,0,0.08381332953770955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,float16,0,0.03967999915281931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,float16,0,0.09425066908200581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,2,64,128,1,float16,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,2,64,128,1,fp8,fp8,0,0.037776000797748566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,2,64,0,1,float16,fp8,0,0.09512533744176228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,2,64,0,1,fp8,fp8,0,0.08472533027331035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,float16,0,0.031744000812371574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,float16,0,0.07825066645940144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,4,64,128,1,float16,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,4,64,128,1,fp8,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,4,64,0,1,float16,fp8,0,0.07878933350245158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,4,64,0,1,fp8,fp8,0,0.06870933373769124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,float16,0,0.07859200239181519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,1,64,128,1,float16,fp8,0,0.03323200096686681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,1,64,128,1,fp8,fp8,0,0.03146133323510488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,1,64,0,1,float16,fp8,0,0.07890133559703827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,1,64,0,1,fp8,fp8,0,0.07073600093523662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,float16,0,0.031471999982992806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,float16,0,0.07858133316040039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,2,64,128,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,2,64,128,1,fp8,fp8,0,0.031173333525657654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,2,64,0,1,float16,fp8,0,0.07868800063927968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,2,64,0,1,fp8,fp8,0,0.07056533296902974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,float16,0,0.2211946646372477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,1,64,128,1,float16,fp8,0,0.22223466634750366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,float16,0,0.4318079948425293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,1,64,128,1,fp8,fp8,0,0.21226133902867636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,1,64,0,1,float16,fp8,0,0.4312800168991089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,float16,0,0.23482666412989298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,1,64,0,1,fp8,fp8,0,0.3895039955774943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,2,64,128,1,float16,fp8,0,0.23553599913915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,2,64,128,1,fp8,fp8,0,0.22509866952896118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,float16,0,0.4445226589838664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,float16,0,0.13090667128562927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,2,64,0,1,fp8,fp8,0,0.40166934331258136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,2,64,0,1,float16,fp8,0,0.4480266571044922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,float16,0,0.24874667326609293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,4,64,128,1,float16,fp8,0,0.13197333614031473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,4,64,128,1,fp8,fp8,0,0.12874666849772134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,4,64,0,1,float16,fp8,0,0.25086400906244916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,float16,0,0.11103999614715576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,4,64,0,1,fp8,fp8,0,0.22697067260742188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,1,64,128,1,float16,fp8,0,0.11150399843851726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,float16,0,0.22628267606099448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,1,64,128,1,fp8,fp8,0,0.1116480032602946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,1,64,0,1,float16,fp8,0,0.23002133766810098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,float16,0,0.11779200037320454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,1,64,0,1,fp8,fp8,0,0.20775467157363892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,2,64,128,1,float16,fp8,0,0.11893332997957866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,2,64,128,1,fp8,fp8,0,0.1176533301671346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,float16,0,0.2342133323351542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,float16,0,0.07053333520889282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,2,64,0,1,float16,fp8,0,0.23630932966868082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,2,64,0,1,fp8,fp8,0,0.2153973380724589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,float16,0,0.13805866241455078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,4,64,128,1,float16,fp8,0,0.07272000114123027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,4,64,128,1,fp8,fp8,0,0.0728053351243337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,float16,0,0.06438399851322174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,4,64,0,1,float16,fp8,0,0.1392586628595988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,4,64,0,1,fp8,fp8,0,0.129120002190272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,1,64,128,1,float16,fp8,0,0.06532266736030579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,float16,0,0.13217600186665854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,1,64,128,1,fp8,fp8,0,0.06258666515350342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,float16,0,0.06664533416430156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,1,64,0,1,fp8,fp8,0,0.11723732948303223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,1,64,0,1,float16,fp8,0,0.1337386667728424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,2,64,128,1,float16,fp8,0,0.06659199794133504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,2,64,128,1,fp8,fp8,0,0.06615466872851054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,float16,0,0.13209600249926248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,float16,0,0.04199466605981191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,float16,0,0.08684800068537395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,2,64,0,1,fp8,fp8,0,0.1197760005791982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,2,64,0,1,float16,fp8,0,0.13474133610725403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,4,64,128,1,float16,fp8,0,0.0440533310174942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,4,64,128,1,fp8,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,4,64,0,1,float16,fp8,0,0.08915199836095174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,4,64,0,1,fp8,fp8,0,0.07945600152015686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,float16,0,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,float16,0,0.08492799599965413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,1,64,128,1,float16,fp8,0,0.0415786678592364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,1,64,128,1,fp8,fp8,0,0.0391839991013209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,1,64,0,1,float16,fp8,0,0.0867786705493927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,float16,0,0.04185600082079569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,1,64,0,1,fp8,fp8,0,0.07696533203125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,float16,0,0.0860533316930135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,2,64,128,1,float16,fp8,0,0.042693331837654114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,2,64,128,1,fp8,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,2,64,0,1,float16,fp8,0,0.0876533289750417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,float16,0,0.029792000850041706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,float16,0,0.059903999169667564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,2,64,0,1,fp8,fp8,0,0.07689066727956136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,4,64,128,1,float16,fp8,0,0.03128000100453695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,4,64,128,1,fp8,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,4,64,0,1,float16,fp8,0,0.060565332571665444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,float16,0,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,4,64,0,1,fp8,fp8,0,0.05443733433882395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,float16,0,0.060191998879114784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,1,64,128,1,float16,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,1,64,128,1,fp8,fp8,0,0.027717334528764088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,1,64,0,1,float16,fp8,0,0.05861866474151611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,1,64,0,1,fp8,fp8,0,0.051882664362589516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,float16,0,0.05993066728115082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,2,64,128,1,float16,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,2,64,128,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,2,64,0,1,float16,fp8,0,0.0583840012550354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,2,64,0,1,fp8,fp8,0,0.05423999826113383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,float16,0,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,float16,0,0.056943997740745544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,4,64,128,1,float16,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,4,64,128,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,4,64,0,1,float16,fp8,0,0.05635199944178263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,4,64,0,1,fp8,fp8,0,0.05169600248336792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,float16,0,0.056218668818473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,1,64,128,1,float16,fp8,0,0.027717334528764088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,1,64,128,1,fp8,fp8,0,0.027087998886903126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,1,64,0,1,float16,fp8,0,0.058229332168896995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,1,64,0,1,fp8,fp8,0,0.05036266644795736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,float16,0,0.027477333943049114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,float16,0,0.05843733251094818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,2,64,128,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,2,64,128,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,2,64,0,1,float16,fp8,0,0.05629866818586985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,2,64,0,1,fp8,fp8,0,0.05235200126965841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,float16,0,0.16978667179743448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,float16,0,0.2839786609013875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,1,64,128,1,float16,fp8,0,0.17079466581344604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,1,64,128,1,fp8,fp8,0,0.1637226641178131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,1,64,0,1,float16,fp8,0,0.2858133316040039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,1,64,0,1,fp8,fp8,0,0.2579306761423747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,float16,0,0.17828265825907388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,2,64,128,1,float16,fp8,0,0.17775466044743857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,float16,0,0.29360000292460126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,2,64,128,1,fp8,fp8,0,0.17268800735473633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,float16,0,0.10085866848627727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,2,64,0,1,float16,fp8,0,0.29364800453186035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,2,64,0,1,fp8,fp8,0,0.2701759934425354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,float16,0,0.16674133141835532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,4,64,128,1,float16,fp8,0,0.10300800204277039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,4,64,128,1,fp8,fp8,0,0.10341333349545796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,4,64,0,1,float16,fp8,0,0.16894932587941489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,4,64,0,1,fp8,fp8,0,0.1567466656366984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,float16,0,0.08708799878756206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,float16,0,0.15185067057609558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,1,64,128,1,float16,fp8,0,0.08879466851552327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,1,64,128,1,fp8,fp8,0,0.08494399984677632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,1,64,0,1,float16,fp8,0,0.15268266201019287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,float16,0,0.09107733766237895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,1,64,0,1,fp8,fp8,0,0.1379039982954661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,float16,0,0.15460800131162009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,2,64,128,1,float16,fp8,0,0.09308266639709473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,2,64,128,1,fp8,fp8,0,0.09114666779836018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,float16,0,0.054042667150497437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,2,64,0,1,float16,fp8,0,0.15686933199564615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,2,64,0,1,fp8,fp8,0,0.14638400077819824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,float16,0,0.09341866771380107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,4,64,128,1,float16,fp8,0,0.058229332168896995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,4,64,128,1,fp8,fp8,0,0.05784533421198527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,4,64,0,1,float16,fp8,0,0.09701333443323772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,4,64,0,1,fp8,fp8,0,0.08925333619117737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,float16,0,0.05197866757710775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,1,64,128,1,float16,fp8,0,0.054010664423306785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,float16,0,0.09117866555849712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,1,64,128,1,fp8,fp8,0,0.05201066533724467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,1,64,0,1,float16,fp8,0,0.09296000003814697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,float16,0,0.05330666899681091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,1,64,0,1,fp8,fp8,0,0.0825493335723877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,2,64,128,1,fp8,fp8,0,0.05195199946562449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,2,64,128,1,float16,fp8,0,0.05426666637261709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,float16,0,0.09121599793434143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,float16,0,0.037733333806196846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,2,64,0,1,float16,fp8,0,0.09300800164540608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,2,64,0,1,fp8,fp8,0,0.08508267005284627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,float16,0,0.062208001812299095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,4,64,128,1,float16,fp8,0,0.03827733298142751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,4,64,128,1,fp8,fp8,0,0.038160001238187156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,4,64,0,1,float16,fp8,0,0.0637066662311554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,4,64,0,1,fp8,fp8,0,0.05820799867312113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,float16,0,0.062074666221936546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,float16,0,0.0364533339937528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,1,64,128,1,float16,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,1,64,128,1,fp8,fp8,0,0.0352906659245491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,1,64,0,1,float16,fp8,0,0.06226666768391927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,float16,0,0.03770133356253306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,1,64,0,1,fp8,fp8,0,0.05515199899673462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,float16,0,0.06058666606744131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,2,64,128,1,float16,fp8,0,0.03811199963092804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,2,64,128,1,fp8,fp8,0,0.035402665535608925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,2,64,0,1,fp8,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,2,64,0,1,float16,fp8,0,0.06333866715431213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,float16,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,float16,0,0.048122664292653404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,4,64,128,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,4,64,128,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,4,64,0,1,float16,fp8,0,0.05013333261013031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,4,64,0,1,fp8,fp8,0,0.044794668753941856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,float16,0,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,float16,0,0.047824000318845115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,1,64,128,1,float16,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,1,64,128,1,fp8,fp8,0,0.026837334036827087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,1,64,0,1,float16,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,1,64,0,1,fp8,fp8,0,0.04409066836039225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,float16,0,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,float16,0,0.04789333542188009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,2,64,128,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,2,64,128,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,2,64,0,1,float16,fp8,0,0.04975466430187225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,2,64,0,1,fp8,fp8,0,0.04382933179537455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,float16,0,0.045754666129748024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,4,64,128,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,4,64,128,1,fp8,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,4,64,0,1,float16,fp8,0,0.04601066807905833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,float16,0,0.025418666501839954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,4,64,0,1,fp8,fp8,0,0.04161066561937332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,float16,0,0.04593066871166229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,1,64,128,1,float16,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,1,64,128,1,fp8,fp8,0,0.02384000023206075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,1,64,0,1,float16,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,1,64,0,1,fp8,fp8,0,0.04166933397452036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,float16,0,0.04618666569391886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,float16,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,2,64,128,1,float16,fp8,0,0.025637333591779072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,2,64,128,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,2,64,0,1,float16,fp8,0,0.0483893354733785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,2,64,0,1,fp8,fp8,0,0.041690667470296226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,float16,0,0.22613332668940225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,1,64,128,1,float16,fp8,0,0.22492265701293945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,float16,0,0.31458133459091187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,1,64,128,1,fp8,fp8,0,0.2143626610438029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,1,64,0,1,float16,fp8,0,0.31293867031733197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,1,64,0,1,fp8,fp8,0,0.2871359984079997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,float16,0,0.2372693419456482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,float16,0,0.3278239965438843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,2,64,128,1,float16,fp8,0,0.23985600471496582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,2,64,128,1,fp8,fp8,0,0.22392000754674277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,2,64,0,1,float16,fp8,0,0.3274719913800557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,float16,0,0.1307199994723002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,2,64,0,1,fp8,fp8,0,0.29870933294296265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,4,64,128,1,float16,fp8,0,0.13272533814112344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,float16,0,0.18033599853515625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,4,64,128,1,fp8,fp8,0,0.13186132907867432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,4,64,0,1,float16,fp8,0,0.18317866325378418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,float16,0,0.1106719970703125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,4,64,0,1,fp8,fp8,0,0.17105066776275635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,float16,0,0.15899200240770975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,1,64,128,1,float16,fp8,0,0.11380799611409505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,1,64,128,1,fp8,fp8,0,0.11229866743087769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,float16,0,0.11807466546694438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,1,64,0,1,float16,fp8,0,0.16125333309173584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,1,64,0,1,fp8,fp8,0,0.1530080040295919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,2,64,128,1,float16,fp8,0,0.12085866928100586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,2,64,128,1,fp8,fp8,0,0.11935999989509583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,float16,0,0.1686826745669047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,float16,0,0.07057600220044453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,2,64,0,1,float16,fp8,0,0.17017600933710733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,2,64,0,1,fp8,fp8,0,0.15956800182660422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,float16,0,0.09707199533780415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,4,64,128,1,float16,fp8,0,0.07428800066312154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,4,64,128,1,fp8,fp8,0,0.0726506660381953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,4,64,0,1,float16,fp8,0,0.10089066624641418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,4,64,0,1,fp8,fp8,0,0.09542933106422424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,float16,0,0.06467199822266896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,float16,0,0.09106666843096416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,1,64,128,1,float16,fp8,0,0.06645333270231883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,1,64,128,1,fp8,fp8,0,0.06253333389759064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,1,64,0,1,float16,fp8,0,0.09186133742332458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,1,64,0,1,fp8,fp8,0,0.08469866712888081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,float16,0,0.06649599969387054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,2,64,128,1,float16,fp8,0,0.06849599877993266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,float16,0,0.09290132919947307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,2,64,128,1,fp8,fp8,0,0.06649599969387054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,float16,0,0.0414986660083135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,float16,0,0.060234665870666504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,2,64,0,1,fp8,fp8,0,0.08701333403587341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,2,64,0,1,float16,fp8,0,0.0960053304831187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,4,64,128,1,float16,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,4,64,128,1,fp8,fp8,0,0.04161600023508072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,4,64,0,1,float16,fp8,0,0.062352001667022705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,4,64,0,1,fp8,fp8,0,0.05813866853713989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,float16,0,0.04052799940109253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,1,64,128,1,float16,fp8,0,0.039749334255854286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,float16,0,0.06044266621271769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,1,64,128,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,1,64,0,1,float16,fp8,0,0.05912533402442932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,1,64,0,1,fp8,fp8,0,0.05445333321889242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,float16,0,0.04167466859022776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,float16,0,0.06228266656398773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,2,64,128,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,2,64,128,1,fp8,fp8,0,0.039642666776975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,2,64,0,1,float16,fp8,0,0.06043200194835663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,2,64,0,1,fp8,fp8,0,0.056128000219662987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,float16,0,0.029232000311215717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,float16,0,0.041984001795450844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,4,64,128,1,float16,fp8,0,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,4,64,128,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,4,64,0,1,float16,fp8,0,0.04223466912905375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,4,64,0,1,fp8,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,float16,0,0.0271519993742307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,float16,0,0.04206933577855428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,1,64,128,1,float16,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,1,64,128,1,fp8,fp8,0,0.027744000156720478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,1,64,0,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,1,64,0,1,fp8,fp8,0,0.038032000263532005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,float16,0,0.029189333319664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,float16,0,0.04215466479460398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,2,64,128,1,float16,fp8,0,0.029658667743206024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,2,64,128,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,2,64,0,1,float16,fp8,0,0.041365332901477814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,2,64,0,1,fp8,fp8,0,0.03902400036652883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,float16,0,0.03775466730197271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,4,64,128,1,float16,fp8,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,4,64,128,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,4,64,0,1,float16,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,4,64,0,1,fp8,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,float16,0,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,float16,0,0.0378506655494372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,1,64,128,1,float16,fp8,0,0.024101334313551586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,1,64,128,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,1,64,0,1,float16,fp8,0,0.03761066744724909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,1,64,0,1,fp8,fp8,0,0.033802665770053864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,float16,0,0.03687999894221624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,2,64,128,1,float16,fp8,0,0.02478400121132533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,2,64,128,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,2,64,0,1,float16,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,2,64,0,1,fp8,fp8,0,0.035546667873859406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,float16,0,0.035530666510264076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,4,64,128,1,float16,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,4,64,128,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,4,64,0,1,float16,fp8,0,0.035887998839219414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,4,64,0,1,fp8,fp8,0,0.03346133232116699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,float16,0,0.0220266655087471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,float16,0,0.03551466763019562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,1,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,1,64,128,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,1,64,0,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,1,64,0,1,fp8,fp8,0,0.03224000086386999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,float16,0,0.0358240008354187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,2,64,128,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,2,64,128,1,fp8,fp8,0,0.022687998910744984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,2,64,0,1,float16,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,2,64,0,1,fp8,fp8,0,0.033514666060606636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,float16,0,0.1879253387451172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,float16,0,0.23186665773391724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,1,64,128,1,float16,fp8,0,0.18944533665974936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,1,64,128,1,fp8,fp8,0,0.1812266707420349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,1,64,0,1,float16,fp8,0,0.23297599951426187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,1,64,0,1,fp8,fp8,0,0.21847999095916748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,float16,0,0.19499200582504272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,2,64,128,1,float16,fp8,0,0.19422399997711182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,float16,0,0.24220800399780273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,2,64,128,1,fp8,fp8,0,0.18867733081181845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,2,64,0,1,float16,fp8,0,0.24016533295313516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,2,64,0,1,fp8,fp8,0,0.225765327612559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,float16,0,0.11002133289972942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,4,64,128,1,float16,fp8,0,0.11131733655929565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,float16,0,0.13723733027776083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,4,64,128,1,fp8,fp8,0,0.11185600360234578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,4,64,0,1,float16,fp8,0,0.13729066650072733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,4,64,0,1,fp8,fp8,0,0.1334986686706543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,float16,0,0.09315199653307597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,1,64,128,1,float16,fp8,0,0.09527466694513957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,1,64,128,1,fp8,fp8,0,0.08943466345469157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,float16,0,0.11766933401425679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,1,64,0,1,fp8,fp8,0,0.10820266604423523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,float16,0,0.09948266545931499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,1,64,0,1,float16,fp8,0,0.11905066172281902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,float16,0,0.1197706659634908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,2,64,128,1,float16,fp8,0,0.10052266716957092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,2,64,128,1,fp8,fp8,0,0.09787733356157939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,float16,0,0.06047466893990835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,2,64,0,1,float16,fp8,0,0.12350933750470479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,2,64,0,1,fp8,fp8,0,0.12125333150227864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,float16,0,0.07584000130494435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,4,64,128,1,float16,fp8,0,0.058415999015172325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,4,64,128,1,fp8,fp8,0,0.058592001597086586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,4,64,0,1,float16,fp8,0,0.07696000238259633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,4,64,0,1,fp8,fp8,0,0.07023466626803081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,float16,0,0.0544053316116333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,float16,0,0.07126933336257935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,1,64,128,1,float16,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,1,64,128,1,fp8,fp8,0,0.052101333936055504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,1,64,0,1,float16,fp8,0,0.07064533233642578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,float16,0,0.05633600056171417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,float16,0,0.0705973356962204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,2,64,128,1,float16,fp8,0,0.05840533475081126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,1,64,0,1,fp8,fp8,0,0.06523733337720235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,2,64,128,1,fp8,fp8,0,0.054117331902186074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,2,64,0,1,float16,fp8,0,0.07273066540559132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,2,64,0,1,fp8,fp8,0,0.06841066479682922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,float16,0,0.039461334546407066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,float16,0,0.04960533479849497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,4,64,128,1,float16,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,4,64,128,1,fp8,fp8,0,0.03774400055408478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,4,64,0,1,float16,fp8,0,0.04910933474699656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,float16,0,0.03749333322048187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,4,64,0,1,fp8,fp8,0,0.04619200030962626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,float16,0,0.04612799982229868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,1,64,128,1,float16,fp8,0,0.03824000060558319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,1,64,128,1,fp8,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,1,64,0,1,float16,fp8,0,0.04770133395989736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,1,64,0,1,fp8,fp8,0,0.04200000067551931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,float16,0,0.038549333810806274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,float16,0,0.0476746658484141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,2,64,128,1,float16,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,2,64,128,1,fp8,fp8,0,0.036176001032193504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,2,64,0,1,float16,fp8,0,0.04850666721661886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,2,64,0,1,fp8,fp8,0,0.0439573327700297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,float16,0,0.03557866563399633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,4,64,128,1,float16,fp8,0,0.027797333896160126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,4,64,128,1,fp8,fp8,0,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,4,64,0,1,float16,fp8,0,0.03614933292071024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,4,64,0,1,fp8,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,float16,0,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,float16,0,0.0352960005402565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,1,64,128,1,float16,fp8,0,0.026005332668622334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,1,64,128,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,1,64,0,1,float16,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,float16,0,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,1,64,0,1,fp8,fp8,0,0.031914666295051575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,float16,0,0.03576533248027166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,2,64,128,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,2,64,128,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,2,64,0,1,float16,fp8,0,0.03577066709597906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,2,64,0,1,fp8,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,float16,0,0.03159466634194056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,4,64,128,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,4,64,128,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,4,64,0,1,float16,fp8,0,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,4,64,0,1,fp8,fp8,0,0.029701332251230877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,float16,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,float16,0,0.03136000037193298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,1,64,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,1,64,128,1,fp8,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,1,64,0,1,float16,fp8,0,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,1,64,0,1,fp8,fp8,0,0.028543998797734577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,float16,0,0.02372266600529353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,float16,0,0.031898667414983116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,2,64,128,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,2,64,128,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,2,64,0,1,float16,fp8,0,0.03188266605138779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,2,64,0,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,float16,0,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,float16,0,0.029711998999118805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,4,64,128,1,float16,fp8,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,4,64,128,1,fp8,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,4,64,0,1,float16,fp8,0,0.030037333567937214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,4,64,0,1,fp8,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,float16,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,float16,0,0.03350399931271871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,1,64,128,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,1,64,128,1,fp8,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,1,64,0,1,float16,fp8,0,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,1,64,0,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,float16,0,0.03136533250411352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,2,64,128,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,2,64,128,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,2,64,0,1,float16,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,2,64,0,1,fp8,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,float16,0,0.1901280085245768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,float16,0,0.21825067202250162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,1,64,128,1,float16,fp8,0,0.18747733036677042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,1,64,128,1,fp8,fp8,0,0.18172800540924072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,1,64,0,1,float16,fp8,0,0.21540266275405884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,1,64,0,1,fp8,fp8,0,0.20506133635838827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,float16,0,0.19399466117223105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,2,64,128,1,float16,fp8,0,0.1916159987449646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,float16,0,0.22290132443110147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,2,64,128,1,fp8,fp8,0,0.19290133317311606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,2,64,0,1,float16,fp8,0,0.22031466166178384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,2,64,0,1,fp8,fp8,0,0.2137706677118937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,float16,0,0.11105599999427795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,4,64,128,1,float16,fp8,0,0.1102133293946584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,float16,0,0.12851732969284058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,4,64,128,1,fp8,fp8,0,0.11174933115641277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,4,64,0,1,float16,fp8,0,0.12762133280436197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,4,64,0,1,fp8,fp8,0,0.12422933181126912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,float16,0,0.1011786659558614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,float16,0,0.11632532874743144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,1,64,128,1,float16,fp8,0,0.101200004418691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,1,64,128,1,fp8,fp8,0,0.09731200337409973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,1,64,0,1,fp8,fp8,0,0.10892800490061443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,float16,0,0.10498666763305664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,1,64,0,1,float16,fp8,0,0.11893332997957866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,float16,0,0.12132267157236735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,2,64,128,1,float16,fp8,0,0.10321600238482158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,2,64,128,1,fp8,fp8,0,0.10545600454012553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,2,64,0,1,fp8,fp8,0,0.11733866731325786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,2,64,0,1,float16,fp8,0,0.12070399522781372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,float16,0,0.06058666606744131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,float16,0,0.07032000025113423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,4,64,128,1,float16,fp8,0,0.06051200131575266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,4,64,0,1,float16,fp8,0,0.06965866684913635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,4,64,128,1,fp8,fp8,0,0.0629066675901413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,4,64,0,1,fp8,fp8,0,0.06871466835339864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,float16,0,0.05866666634877523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,float16,0,0.06644266843795776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,1,64,128,1,float16,fp8,0,0.05825066566467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,1,64,0,1,float16,fp8,0,0.06664533416430156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,1,64,128,1,fp8,fp8,0,0.05648000041643778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,1,64,0,1,fp8,fp8,0,0.06246933341026306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,float16,0,0.05827199916044871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,float16,0,0.06849599877993266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,2,64,128,1,float16,fp8,0,0.058693334460258484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,2,64,128,1,fp8,fp8,0,0.056799997886021934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,2,64,0,1,float16,fp8,0,0.06730666756629944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,float16,0,0.0378560001651446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,2,64,0,1,fp8,fp8,0,0.06425599753856659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,float16,0,0.046037331223487854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,4,64,128,1,float16,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,4,64,128,1,fp8,fp8,0,0.03758399933576584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,4,64,0,1,float16,fp8,0,0.04572799801826477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,4,64,0,1,fp8,fp8,0,0.0441599984963735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,float16,0,0.038058665891488395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,float16,0,0.04587199787298838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,1,64,128,1,float16,fp8,0,0.03793066740036011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,1,64,128,1,fp8,fp8,0,0.037861332297325134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,1,64,0,1,float16,fp8,0,0.04358399907747904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,1,64,0,1,fp8,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,float16,0,0.03781333317359289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,float16,0,0.045226668318112694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,2,64,128,1,fp8,fp8,0,0.03730133424202601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,2,64,128,1,float16,fp8,0,0.03782933453718821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,2,64,0,1,float16,fp8,0,0.04508799811204275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,float16,0,0.027349332968393963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,2,64,0,1,fp8,fp8,0,0.04349866509437561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,4,64,128,1,float16,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,4,64,128,1,fp8,fp8,0,0.027637332677841187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,4,64,0,1,float16,fp8,0,0.031290667752424874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,4,64,0,1,fp8,fp8,0,0.02932800104220708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,float16,0,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,float16,0,0.02940800040960312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,1,64,128,1,float16,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,1,64,128,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,1,64,0,1,float16,fp8,0,0.029178666571776073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,1,64,0,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,float16,0,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,2,64,128,1,float16,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,2,64,128,1,fp8,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,2,64,0,1,float16,fp8,0,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,2,64,0,1,fp8,fp8,0,0.028309332827727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,float16,0,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,float16,0,0.026554666459560394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,4,64,128,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,4,64,128,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,4,64,0,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,4,64,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,float16,0,0.025573333104451496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,1,64,128,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,1,64,128,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,1,64,0,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,1,64,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,float16,0,0.021482666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,2,64,128,1,float16,fp8,0,0.022197333474953968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,2,64,128,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,2,64,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,2,64,0,1,fp8,fp8,0,0.024517332514127094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,4,64,128,1,float16,fp8,0,0.02070933332045873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,4,64,128,1,fp8,fp8,0,0.02059200033545494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,4,64,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,4,64,0,1,fp8,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,float16,0,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,1,64,128,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,1,64,128,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,1,64,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,1,64,0,1,fp8,fp8,0,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,float16,0,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,2,64,128,1,float16,fp8,0,0.02063999945918719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,2,64,128,1,fp8,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,2,64,0,1,float16,fp8,0,0.02384000023206075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,2,64,0,1,fp8,fp8,0,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,float16,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,4,64,128,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,4,64,128,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,4,64,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,4,64,0,1,float16,fp8,0,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,float16,0,0.024160000185171764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,1,64,128,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,1,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,1,64,0,1,float16,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,1,64,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,float16,0,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,float16,0,0.02421333392461141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,2,64,128,1,float16,fp8,0,0.01883200059334437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,2,64,128,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,2,64,0,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,2,64,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,1,64,0,1,float16,float16,0,0.18613332509994507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,1,64,128,1,float16,float16,0,0.1872640053431193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,1,64,128,1,float16,fp8,0,0.18301333983739218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,1,64,128,1,fp8,fp8,0,0.18054932355880737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,1,64,0,1,float16,fp8,0,0.1834239959716797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,1,64,0,1,fp8,fp8,0,0.1767680048942566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,2,64,128,1,float16,float16,0,0.18798933426539102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,2,64,128,1,float16,fp8,0,0.18533867597579956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,2,64,128,1,fp8,fp8,0,0.18505066633224487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,2,64,0,1,float16,float16,0,0.1906933387120565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,2,64,0,1,float16,fp8,0,0.1859040061632792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,4,64,128,1,float16,float16,0,0.10782399773597717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,4,64,0,1,float16,float16,0,0.11050132910410564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,2,64,0,1,fp8,fp8,0,0.1841119925181071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,4,64,128,1,float16,fp8,0,0.10547733306884766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,4,64,128,1,fp8,fp8,0,0.10876799623171489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,4,64,0,1,float16,fp8,0,0.10784000158309937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,4,64,0,1,fp8,fp8,0,0.10717333356539409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,1,64,0,1,float16,float16,0,0.09921066959698994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,1,64,128,1,float16,float16,0,0.10105599959691365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,1,64,128,1,float16,fp8,0,0.0995093286037445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,1,64,128,1,fp8,fp8,0,0.09513599673906963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,1,64,0,1,float16,fp8,0,0.10026133060455322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,1,64,0,1,fp8,fp8,0,0.09343467156092326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,2,64,128,1,float16,float16,0,0.10309867064158122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,2,64,0,1,float16,float16,0,0.10181867082913716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,2,64,128,1,float16,fp8,0,0.10107200344403584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,2,64,128,1,fp8,fp8,0,0.10236799716949463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,2,64,0,1,float16,fp8,0,0.10099732875823975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,4,64,128,1,float16,float16,0,0.06046933432420095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,2,64,0,1,fp8,fp8,0,0.10149866342544556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,4,64,0,1,float16,float16,0,0.05846933523813883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,4,64,128,1,float16,fp8,0,0.05834133426348368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,4,64,128,1,fp8,fp8,0,0.058143998185793556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,4,64,0,1,float16,fp8,0,0.05857066810131073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,4,64,0,1,fp8,fp8,0,0.058320000767707825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,1,64,128,1,float16,float16,0,0.05608533322811127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,1,64,0,1,float16,float16,0,0.058117335041364036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,1,64,128,1,float16,fp8,0,0.05607999861240387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,1,64,128,1,fp8,fp8,0,0.05461333195368449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,1,64,0,1,float16,fp8,0,0.05795733133951823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,1,64,0,1,fp8,fp8,0,0.05399466554323832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,2,64,128,1,float16,float16,0,0.05788266658782959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,2,64,0,1,float16,float16,0,0.05657066901524862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,2,64,128,1,float16,fp8,0,0.056133334835370384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,2,64,128,1,fp8,fp8,0,0.05622933308283488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,2,64,0,1,float16,fp8,0,0.05639466643333435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,2,64,0,1,fp8,fp8,0,0.05470400055249532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,4,64,128,1,float16,float16,0,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,4,64,0,1,float16,float16,0,0.039781334499518074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,4,64,128,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,4,64,128,1,fp8,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,4,64,0,1,float16,fp8,0,0.03998400022586187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,4,64,0,1,fp8,fp8,0,0.03789333254098892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,1,64,128,1,float16,float16,0,0.037818667789300285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,1,64,0,1,float16,float16,0,0.03888533264398575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,1,64,128,1,float16,fp8,0,0.03807999938726425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,1,64,128,1,fp8,fp8,0,0.03577066709597906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,1,64,0,1,float16,fp8,0,0.03867733230193456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,1,64,0,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,2,64,128,1,float16,float16,0,0.037445334096749626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,2,64,0,1,float16,float16,0,0.03782399992148081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,2,64,128,1,float16,fp8,0,0.03793066740036011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,2,64,128,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,2,64,0,1,float16,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,2,64,0,1,fp8,fp8,0,0.03737599899371465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,4,64,128,1,float16,float16,0,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,4,64,0,1,float16,float16,0,0.025631998976071674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,4,64,128,1,float16,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,4,64,128,1,fp8,fp8,0,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,4,64,0,1,float16,fp8,0,0.025770666698614757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,4,64,0,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,1,64,128,1,float16,float16,0,0.025018667181332905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,1,64,0,1,float16,float16,0,0.02566933383544286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,1,64,128,1,float16,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,1,64,128,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,1,64,0,1,float16,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,1,64,0,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,2,64,128,1,float16,float16,0,0.02569066733121872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,2,64,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,2,64,128,1,float16,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,2,64,0,1,float16,fp8,0,0.02565866708755493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,4,64,128,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,2,64,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,2,64,128,1,fp8,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,4,64,0,1,float16,float16,0,0.02221333235502243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,4,64,128,1,float16,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,4,64,128,1,fp8,fp8,0,0.021727999051411945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,4,64,0,1,float16,fp8,0,0.02294933299223582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,4,64,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,1,64,128,1,float16,float16,0,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,1,64,0,1,float16,float16,0,0.021525333325068157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,1,64,128,1,fp8,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,1,64,128,1,float16,fp8,0,0.021530665457248688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,1,64,0,1,float16,fp8,0,0.022463999688625336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,1,64,0,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,2,64,128,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,2,64,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,2,64,128,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,2,64,128,1,fp8,fp8,0,0.021615999440352123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,2,64,0,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,2,64,0,1,fp8,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,4,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,4,64,0,1,float16,float16,0,0.020245333512624104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,4,64,128,1,float16,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,4,64,128,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,4,64,0,1,float16,fp8,0,0.019904000063737232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,4,64,0,1,fp8,fp8,0,0.019632000476121902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,1,64,0,1,float16,float16,0,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,1,64,128,1,float16,float16,0,0.02089066555102666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,1,64,128,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,1,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,1,64,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,2,64,128,1,float16,float16,0,0.01966933285196622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,1,64,0,1,fp8,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,2,64,0,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,2,64,128,1,float16,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,2,64,0,1,float16,fp8,0,0.020448000480731327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,2,64,128,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,4,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,2,64,0,1,fp8,fp8,0,0.019653332730134327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,4,64,0,1,float16,float16,0,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,4,64,128,1,float16,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,4,64,128,1,fp8,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,4,64,0,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,4,64,0,1,fp8,fp8,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,1,64,128,1,float16,float16,0,0.019706666469573975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,1,64,0,1,float16,float16,0,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,1,64,128,1,float16,fp8,0,0.020021333048741024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,1,64,128,1,fp8,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,1,64,0,1,float16,fp8,0,0.02080533280968666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,1,64,0,1,fp8,fp8,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,2,64,128,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,2,64,128,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,2,64,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,2,64,128,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,2,64,0,1,float16,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,2,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,4,64,128,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,4,64,0,1,float16,float16,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,4,64,128,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,4,64,128,1,fp8,fp8,0,0.019727999965349834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,4,64,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,4,64,0,1,fp8,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,1,64,128,1,float16,float16,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,1,64,0,1,float16,float16,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,1,64,128,1,float16,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,1,64,128,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,1,64,0,1,float16,fp8,0,0.020938667158285778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,1,64,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,2,64,128,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,2,64,0,1,float16,float16,0,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,2,64,128,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,2,64,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,2,64,128,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,2,64,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,1,64,128,1,float16,float16,0,0.09337600072224934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,1,64,0,1,float16,float16,0,0.09170132875442505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,1,64,128,1,float16,fp8,0,0.09294933080673218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,1,64,128,1,fp8,fp8,0,0.08920533458391826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,1,64,0,1,float16,fp8,0,0.09109333157539368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,1,64,0,1,fp8,fp8,0,0.08475200335184734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,2,64,128,1,float16,float16,0,0.09546666344006856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,2,64,0,1,float16,float16,0,0.09481599926948547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,2,64,128,1,float16,fp8,0,0.09327466289202373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,2,64,128,1,fp8,fp8,0,0.0965226689974467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,2,64,0,1,float16,fp8,0,0.09113599856694539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,2,64,0,1,fp8,fp8,0,0.09241066376368205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,4,64,128,1,float16,float16,0,0.05611733098824819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,4,64,0,1,float16,float16,0,0.05392000079154968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,4,64,128,1,float16,fp8,0,0.055173332492510475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,4,64,128,1,fp8,fp8,0,0.05620799958705902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,4,64,0,1,float16,fp8,0,0.05428266525268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,4,64,0,1,fp8,fp8,0,0.05400000015894572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,1,64,128,1,float16,float16,0,0.05381333331267039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,1,64,0,1,float16,float16,0,0.05208000044027964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,1,64,128,1,float16,fp8,0,0.0521919975678126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,1,64,128,1,fp8,fp8,0,0.049829334020614624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,1,64,0,1,float16,fp8,0,0.05231999854246775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,1,64,0,1,fp8,fp8,0,0.04886933167775472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,2,64,128,1,float16,float16,0,0.054144000013669334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,2,64,0,1,float16,float16,0,0.0532533327738444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,2,64,128,1,float16,fp8,0,0.051925331354141235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,2,64,128,1,fp8,fp8,0,0.053786665201187134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,2,64,0,1,float16,fp8,0,0.052005335688591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,4,64,128,1,float16,float16,0,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,4,64,0,1,float16,float16,0,0.0358240008354187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,4,64,128,1,float16,fp8,0,0.036159999668598175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,2,64,0,1,fp8,fp8,0,0.05261866748332977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,4,64,128,1,fp8,fp8,0,0.03579733272393545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,4,64,0,1,float16,fp8,0,0.035829332967599235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,4,64,0,1,fp8,fp8,0,0.034688000877698265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,1,64,128,1,float16,float16,0,0.03536533315976461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,1,64,128,1,float16,fp8,0,0.03385599950949351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,1,64,0,1,float16,float16,0,0.034416000048319496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,1,64,128,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,1,64,0,1,float16,fp8,0,0.033573334415753685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,1,64,0,1,fp8,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,2,64,0,1,float16,float16,0,0.03578133384386698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,2,64,128,1,float16,float16,0,0.03499733408292135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,2,64,128,1,float16,fp8,0,0.03435733417669932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,2,64,128,1,fp8,fp8,0,0.033759998778502144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,2,64,0,1,float16,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,2,64,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,4,64,128,1,float16,float16,0,0.02369600037733714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,4,64,0,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,4,64,128,1,float16,fp8,0,0.0239680012067159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,4,64,128,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,4,64,0,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,4,64,0,1,fp8,fp8,0,0.02554133286078771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,1,64,128,1,float16,float16,0,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,1,64,0,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,1,64,128,1,float16,fp8,0,0.024442667762438457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,1,64,128,1,fp8,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,1,64,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,1,64,0,1,fp8,fp8,0,0.022890667120615642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,2,64,128,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,2,64,0,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,2,64,128,1,float16,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,2,64,128,1,fp8,fp8,0,0.023978665471076965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,2,64,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,2,64,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,4,64,128,1,float16,float16,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,4,64,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,4,64,128,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,4,64,128,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,4,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,4,64,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,1,64,128,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,1,64,0,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,1,64,128,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,1,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,1,64,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,1,64,0,1,fp8,fp8,0,0.01952533299724261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,2,64,128,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,2,64,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,2,64,128,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,2,64,128,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,2,64,0,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,2,64,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,4,64,128,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,4,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,4,64,128,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,4,64,128,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,4,64,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,4,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,1,64,128,1,float16,float16,0,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,1,64,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,1,64,128,1,float16,fp8,0,0.018672000616788864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,1,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,1,64,128,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,1,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,2,64,128,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,2,64,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,2,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,2,64,128,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,2,64,0,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,2,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,4,64,128,1,float16,float16,0,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,4,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,4,64,0,1,float16,float16,0,0.018079999834299088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,4,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,4,64,0,1,float16,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,4,64,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,1,64,128,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,1,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,1,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,1,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,1,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,1,64,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,2,64,128,1,float16,float16,0,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,2,64,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,2,64,128,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,2,64,128,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,2,64,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,2,64,0,1,fp8,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,4,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,4,64,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,4,64,128,1,float16,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,4,64,128,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,4,64,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,4,64,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,1,64,128,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,1,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,1,64,128,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,1,64,128,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,1,64,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,1,64,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,2,64,128,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,2,64,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,2,64,128,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,2,64,128,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,2,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,2,64,0,1,fp8,fp8,0,0.017621333400408428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,4,64,128,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,4,64,0,1,float16,float16,0,0.017664000391960144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,4,64,128,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,4,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,4,64,0,1,float16,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,4,64,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,1,64,128,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,1,64,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,1,64,128,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,1,64,128,1,float16,fp8,0,0.018378666291634243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,1,64,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,1,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,2,64,128,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,2,64,128,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,2,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,2,64,0,1,float16,float16,0,0.017893332988023758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,2,64,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,2,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,1,64,128,1,float16,float16,0,0.054197331269582115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,1,64,0,1,float16,float16,0,0.05379733443260193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,1,64,128,1,float16,fp8,0,0.05434666574001312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,1,64,128,1,fp8,fp8,0,0.05012799799442291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,1,64,0,1,float16,fp8,0,0.05363733569780985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,1,64,0,1,fp8,fp8,0,0.05216533442338308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,2,64,128,1,float16,float16,0,0.053914666175842285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,2,64,0,1,float16,float16,0,0.053802669048309326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,2,64,128,1,float16,fp8,0,0.05388799806435903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,2,64,128,1,fp8,fp8,0,0.05324266850948334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,2,64,0,1,float16,fp8,0,0.05377600093682607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,2,64,0,1,fp8,fp8,0,0.05221333106358846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,4,64,128,1,float16,float16,0,0.03566933423280716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,4,64,0,1,float16,float16,0,0.03779733429352442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,4,64,128,1,float16,fp8,0,0.0360000009338061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,4,64,128,1,fp8,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,4,64,0,1,float16,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,4,64,0,1,fp8,fp8,0,0.03557866563399633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,1,64,128,1,float16,float16,0,0.035402665535608925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,1,64,0,1,float16,float16,0,0.035546667873859406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,1,64,128,1,float16,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,1,64,128,1,fp8,fp8,0,0.033386667569478355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,1,64,0,1,float16,fp8,0,0.03583466758330663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,1,64,0,1,fp8,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,2,64,128,1,float16,float16,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,2,64,0,1,float16,float16,0,0.03544000039498011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,2,64,128,1,float16,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,2,64,128,1,fp8,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,2,64,0,1,float16,fp8,0,0.035360001027584076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,4,64,128,1,float16,float16,0,0.02515733242034912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,2,64,0,1,fp8,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,4,64,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,4,64,128,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,4,64,128,1,fp8,fp8,0,0.024122667809327442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,4,64,0,1,float16,fp8,0,0.02426133304834366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,4,64,0,1,fp8,fp8,0,0.023455999791622162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,1,64,128,1,float16,float16,0,0.023818666736284893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,1,64,0,1,float16,float16,0,0.024847999215126038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,1,64,128,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,1,64,128,1,fp8,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,1,64,0,1,float16,fp8,0,0.025034666061401367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,1,64,0,1,fp8,fp8,0,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,2,64,128,1,float16,float16,0,0.025087999800841015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,2,64,0,1,float16,float16,0,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,2,64,128,1,float16,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,2,64,128,1,fp8,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,2,64,0,1,float16,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,2,64,0,1,fp8,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,4,64,128,1,float16,float16,0,0.017759999881188076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,4,64,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,4,64,128,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,4,64,128,1,fp8,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,4,64,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,4,64,0,1,fp8,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,1,64,128,1,float16,float16,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,1,64,128,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,1,64,0,1,float16,float16,0,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,1,64,128,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,1,64,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,1,64,0,1,fp8,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,2,64,0,1,float16,float16,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,2,64,128,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,2,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,2,64,128,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,2,64,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,4,64,128,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,2,64,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,4,64,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,4,64,128,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,4,64,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,4,64,128,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,1,64,128,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,4,64,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,1,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,1,64,128,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,1,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,1,64,0,1,float16,fp8,0,0.016410666207472484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,1,64,0,1,fp8,fp8,0,0.01579733317097028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,2,64,128,1,float16,float16,0,0.015770666301250458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,2,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,2,64,128,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,2,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,2,64,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,2,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,4,64,128,1,float16,float16,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,4,64,0,1,float16,float16,0,0.01682666689157486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,4,64,128,1,float16,fp8,0,0.015781333049138386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,4,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,4,64,0,1,float16,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,4,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,1,64,128,1,float16,float16,0,0.016176000237464905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,1,64,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,1,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,1,64,128,1,fp8,fp8,0,0.01647466669480006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,1,64,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,1,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,2,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,2,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,2,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,2,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,2,64,0,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,4,64,128,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,4,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,4,64,128,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,4,64,128,1,fp8,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,4,64,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,4,64,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,1,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,1,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,1,64,128,1,float16,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,1,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,1,64,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,1,64,0,1,fp8,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,2,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,2,64,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,2,64,128,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,2,64,128,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,2,64,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,2,64,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,4,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,4,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,4,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,4,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,4,64,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,4,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,1,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,1,64,128,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,1,64,128,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,1,64,0,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,1,64,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,2,64,128,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,2,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,2,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,2,64,128,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,2,64,0,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,2,64,0,1,fp8,fp8,0,0.015749332805474598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,4,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,4,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,4,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,4,64,128,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,4,64,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,4,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,1,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,1,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,1,64,128,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,1,64,128,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,1,64,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,1,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,2,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,2,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,2,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,2,64,128,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,2,64,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,2,64,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,1,64,128,1,float16,float16,0,0.043866669138272606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,1,64,0,1,float16,float16,0,0.04367466767628988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,1,64,128,1,float16,fp8,0,0.04194133480389913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,1,64,0,1,float16,fp8,0,0.042912001411120095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,1,64,128,1,fp8,fp8,0,0.03994666785001755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,1,64,0,1,fp8,fp8,0,0.03993066648642222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,2,64,0,1,float16,float16,0,0.041802664597829185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,2,64,128,1,float16,float16,0,0.04185600082079569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,2,64,128,1,float16,fp8,0,0.043605332573254905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,2,64,128,1,fp8,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,2,64,0,1,float16,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,2,64,0,1,fp8,fp8,0,0.04156800111134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,4,64,128,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,4,64,0,1,float16,float16,0,0.027647999425729115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,4,64,128,1,float16,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,4,64,128,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,4,64,0,1,float16,fp8,0,0.028959999481836956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,4,64,0,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,1,64,128,1,float16,float16,0,0.02938666691382726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,1,64,0,1,float16,float16,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,1,64,128,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,1,64,128,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,1,64,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,1,64,0,1,fp8,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,2,64,128,1,float16,float16,0,0.027642667293548584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,2,64,0,1,float16,float16,0,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,2,64,128,1,float16,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,2,64,0,1,float16,fp8,0,0.0296426663796107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,2,64,128,1,fp8,fp8,0,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,4,64,128,1,float16,float16,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,2,64,0,1,fp8,fp8,0,0.028037334481875103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,4,64,0,1,float16,float16,0,0.01982933282852173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,4,64,128,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,4,64,128,1,fp8,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,4,64,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,4,64,0,1,fp8,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,1,64,128,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,1,64,0,1,float16,float16,0,0.020938667158285778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,1,64,128,1,float16,fp8,0,0.020074666788180668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,1,64,128,1,fp8,fp8,0,0.020949333906173706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,1,64,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,1,64,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,2,64,128,1,float16,float16,0,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,2,64,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,2,64,128,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,2,64,128,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,2,64,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,2,64,0,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,4,64,128,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,4,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,4,64,128,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,4,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,4,64,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,4,64,0,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,1,64,128,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,1,64,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,1,64,128,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,1,64,128,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,1,64,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,1,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,2,64,128,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,2,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,2,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,2,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,2,64,0,1,float16,fp8,0,0.016016000260909397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,2,64,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,4,64,128,1,float16,float16,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,4,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,4,64,128,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,4,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,4,64,0,1,float16,fp8,0,0.014752000570297241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,4,64,0,1,fp8,fp8,0,0.015840000162522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,1,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,1,64,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,1,64,128,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,1,64,0,1,float16,fp8,0,0.016490666816631954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,1,64,0,1,fp8,fp8,0,0.015749332805474598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,2,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,2,64,0,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,2,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,2,64,128,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,2,64,0,1,float16,fp8,0,0.016373333831628162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,4,64,128,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,2,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,4,64,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,4,64,128,1,float16,fp8,0,0.01752000053723653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,4,64,128,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,4,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,4,64,0,1,fp8,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,1,64,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,1,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,1,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,1,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,1,64,0,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,1,64,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,2,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,2,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,2,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,2,64,128,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,2,64,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,2,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,4,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,4,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,4,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,4,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,4,64,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,4,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,1,64,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,1,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,1,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,1,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,1,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,1,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,2,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,2,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,2,64,128,1,float16,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,2,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,2,64,0,1,float16,fp8,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,2,64,0,1,fp8,fp8,0,0.015856000284353893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,4,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,4,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,4,64,128,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,4,64,128,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,4,64,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,4,64,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,1,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,1,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,1,64,128,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,1,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,1,64,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,1,64,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,2,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,2,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,2,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,2,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,2,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,2,64,0,1,fp8,fp8,0,0.015775999675194424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,4,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,4,64,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,4,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,4,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,4,64,0,1,float16,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,4,64,0,1,fp8,fp8,0,0.01616000011563301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,1,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,1,64,128,1,float16,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,1,64,128,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,1,64,128,1,fp8,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,1,64,0,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,1,64,0,1,fp8,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,2,64,128,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,2,64,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,2,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,2,64,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,2,64,128,1,float16,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,2,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,1,64,128,1,float16,float16,0,0.03566933423280716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,1,64,0,1,float16,float16,0,0.03608533243338267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,1,64,128,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,1,64,128,1,fp8,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,1,64,0,1,fp8,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,1,64,0,1,float16,fp8,0,0.036415999134381614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,2,64,128,1,float16,float16,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,2,64,0,1,float16,float16,0,0.033813332517941795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,2,64,128,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,2,64,128,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,2,64,0,1,float16,fp8,0,0.035402665535608925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,4,64,128,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,2,64,0,1,fp8,fp8,0,0.03417066733042399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,4,64,0,1,float16,float16,0,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,4,64,128,1,float16,fp8,0,0.02492266645034154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,4,64,128,1,fp8,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,4,64,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,4,64,0,1,fp8,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,1,64,128,1,float16,float16,0,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,1,64,0,1,float16,float16,0,0.024559999505678814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,1,64,128,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,1,64,0,1,float16,fp8,0,0.023546665906906128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,1,64,128,1,fp8,fp8,0,0.023962666591008503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,1,64,0,1,fp8,fp8,0,0.02475733309984207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,2,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,2,64,0,1,float16,float16,0,0.025583999852339428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,2,64,128,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,2,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,2,64,0,1,float16,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,2,64,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,4,64,128,1,float16,float16,0,0.018677332748969395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,4,64,128,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,4,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,4,64,128,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,4,64,0,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,4,64,0,1,fp8,fp8,0,0.01960533360640208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,1,64,128,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,1,64,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,1,64,128,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,1,64,128,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,1,64,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,1,64,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,2,64,128,1,float16,float16,0,0.018730666488409042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,2,64,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,2,64,128,1,float16,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,2,64,128,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,2,64,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,2,64,0,1,fp8,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,4,64,128,1,float16,float16,0,0.015919999529918034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,4,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,4,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,4,64,128,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,4,64,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,4,64,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,1,64,128,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,1,64,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,1,64,128,1,float16,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,1,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,1,64,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,1,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,2,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,2,64,0,1,float16,float16,0,0.014783999572197596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,2,64,128,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,2,64,128,1,fp8,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,2,64,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,4,64,128,1,float16,float16,0,0.014655999839305878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,2,64,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,4,64,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,4,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,4,64,128,1,float16,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,4,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,4,64,0,1,fp8,fp8,0,0.016208000481128693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,1,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,1,64,128,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,1,64,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,1,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,1,64,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,2,64,128,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,2,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,2,64,128,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,2,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,2,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,4,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,2,64,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,4,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,4,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,4,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,4,64,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,4,64,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,1,64,128,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,1,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,1,64,128,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,1,64,128,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,1,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,1,64,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,2,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,2,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,2,64,128,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,2,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,2,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,2,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,4,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,4,64,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,4,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,4,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,4,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,4,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,1,64,128,1,float16,float16,0,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,1,64,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,1,64,128,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,1,64,128,1,fp8,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,1,64,0,1,float16,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,2,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,1,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,2,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,2,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,2,64,128,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,2,64,0,1,float16,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,2,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,4,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,4,64,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,4,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,4,64,128,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,4,64,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,4,64,0,1,fp8,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,1,64,128,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,1,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,1,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,1,64,128,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,1,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,1,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,2,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,2,64,0,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,2,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,2,64,128,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,2,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,2,64,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,4,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,4,64,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,4,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,4,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,4,64,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,4,64,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,1,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,1,64,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,1,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,1,64,128,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,1,64,0,1,fp8,fp8,0,0.014730667074521383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,1,64,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,2,64,128,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,2,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,2,64,128,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,2,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,2,64,0,1,float16,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,1,64,128,1,float16,float16,0,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,1,64,0,1,float16,float16,0,0.03129599988460541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,1,64,128,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,1,64,128,1,fp8,fp8,0,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,1,64,0,1,float16,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,1,64,0,1,fp8,fp8,0,0.029813334345817566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,2,64,128,1,float16,float16,0,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,2,64,0,1,float16,float16,0,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,2,64,128,1,float16,fp8,0,0.03194666653871536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,2,64,128,1,fp8,fp8,0,0.031167998909950256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,2,64,0,1,float16,fp8,0,0.03154666721820831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,2,64,0,1,fp8,fp8,0,0.03124266614516576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,4,64,128,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,4,64,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,4,64,128,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,4,64,128,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,4,64,0,1,float16,fp8,0,0.02364266663789749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,4,64,0,1,fp8,fp8,0,0.02239466706911723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,1,64,128,1,float16,float16,0,0.022874665757020313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,1,64,0,1,float16,float16,0,0.023013333479563396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,1,64,128,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,1,64,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,1,64,128,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,2,64,128,1,float16,float16,0,0.024911999702453613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,1,64,0,1,fp8,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,2,64,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,2,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,2,64,128,1,fp8,fp8,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,4,64,128,1,float16,float16,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,2,64,0,1,fp8,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,2,64,0,1,float16,fp8,0,0.02387733260790507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,4,64,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,4,64,128,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,4,64,128,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,4,64,0,1,float16,fp8,0,0.01794133335351944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,4,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,1,64,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,1,64,128,1,float16,float16,0,0.01762666677435239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,1,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,1,64,128,1,fp8,fp8,0,0.018810667097568512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,1,64,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,1,64,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,2,64,128,1,float16,float16,0,0.01886933296918869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,2,64,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,2,64,128,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,2,64,128,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,2,64,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,2,64,0,1,float16,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,4,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,4,64,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,4,64,128,1,float16,fp8,0,0.016522667060295742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,4,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,4,64,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,4,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,1,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,1,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,1,64,128,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,1,64,128,1,fp8,fp8,0,0.01661866654952367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,1,64,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,1,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,2,64,128,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,2,64,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,2,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,2,64,128,1,fp8,fp8,0,0.016650666793187458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,2,64,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,2,64,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,4,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,4,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,4,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,4,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,4,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,4,64,0,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,1,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,1,64,128,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,1,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,1,64,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,1,64,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,2,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,2,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,2,64,128,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,2,64,128,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,2,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,4,64,128,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,2,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,4,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,4,64,128,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,4,64,128,1,fp8,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,4,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,1,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,4,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,1,64,0,1,float16,float16,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,1,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,1,64,128,1,fp8,fp8,0,0.015487999965747198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,1,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,1,64,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,2,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,2,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,2,64,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,2,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,2,64,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,2,64,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,4,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,4,64,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,4,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,4,64,128,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,4,64,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,4,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,1,64,128,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,1,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,1,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,1,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,1,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,2,64,128,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,2,64,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,2,64,128,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,2,64,128,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,2,64,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,2,64,0,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,4,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,4,64,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,4,64,128,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,4,64,128,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,4,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,4,64,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,1,64,128,1,float16,float16,0,0.015637333194414776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,1,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,1,64,128,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,1,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,1,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,2,64,128,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,1,64,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,2,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,2,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,2,64,128,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,2,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,2,64,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,4,64,128,1,float16,float16,0,0.014794666320085526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,4,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,4,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,4,64,128,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,4,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,4,64,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,1,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,1,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,1,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,1,64,128,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,1,64,0,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,1,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,2,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,2,64,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,2,64,128,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,2,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,2,64,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,2,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,0,0.029520000020662945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,1,64,128,1,fp8,fp8,0,0.0276053324341774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,1,64,0,1,fp8,fp8,0,0.02826666583617528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,0,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,0,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,2,64,128,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,2,64,0,1,fp8,fp8,0,0.029333333174387615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,4,64,128,1,float16,float16,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,4,64,128,1,float16,fp8,0,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,4,64,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,4,64,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,0,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,1,64,128,1,fp8,fp8,0,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,0,0.0232640008131663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,1,64,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,0,0.023050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,0,0.022869333624839783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,2,64,128,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,2,64,0,1,fp8,fp8,0,0.020954666038354237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,4,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,4,64,128,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,4,64,128,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,4,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,1,64,128,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,1,64,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,2,64,128,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,4,64,128,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,2,64,0,1,fp8,fp8,0,0.018789333601792652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,4,64,128,1,float16,fp8,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,4,64,128,1,fp8,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,4,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,1,64,128,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,0,0.015813333292802174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,1,64,0,1,fp8,fp8,0,0.01479999969402949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,2,64,128,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,0,0.015605332950750986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,2,64,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,4,64,128,1,float16,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,4,64,128,1,float16,float16,0,0.01565333331624667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,4,64,128,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,4,64,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,1,64,128,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,0,0.01651200031240781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,1,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,2,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,0,0.016410666207472484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,2,64,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,4,64,128,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,4,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,4,64,128,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,4,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,1,64,128,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,1,64,0,1,fp8,fp8,0,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,0,0.01575999955336253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,2,64,128,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,4,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,2,64,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,4,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,4,64,128,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,4,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,0,0.017792000124851864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,1,64,128,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,1,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,0,0.014826666563749313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,2,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,2,64,0,1,fp8,fp8,0,0.014682666709025701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,4,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,4,64,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,4,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,4,64,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,0,0.01647466669480006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,1,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,1,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,2,64,128,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,2,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,4,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,4,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,4,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,4,64,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,1,64,128,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,1,64,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,0,0.01470400020480156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,0,0.014688000082969666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,2,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,2,64,0,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,float16,0,0.11874133348464966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,2,1,64,128,1,float16,fp8,0,0.11983467141787212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,float16,0,0.6275039911270142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,2,1,64,128,1,fp8,fp8,0,0.11809600392977397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,float16,0,0.07272533575693767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,2,1,64,0,1,float16,fp8,0,0.6287413438161215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,2,1,64,0,1,fp8,fp8,0,0.5534026622772217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,2,64,128,1,float16,fp8,0,0.07682666679223378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,float16,0,0.368341326713562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,2,64,128,1,fp8,fp8,0,0.07538666824499766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,2,64,0,1,float16,fp8,0,0.37002134323120117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,float16,0,0.06854933500289917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,2,64,0,1,fp8,fp8,0,0.32449599107106525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,1,64,128,1,float16,fp8,0,0.07056533296902974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,1,64,128,1,fp8,fp8,0,0.06854400038719177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,float16,0,0.3645013173421224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,1,64,0,1,float16,fp8,0,0.36444799105326336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,float16,0,0.04772266745567322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,1,64,0,1,fp8,fp8,0,0.31613866488138836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,float16,0,0.23828266064325967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,2,64,128,1,float16,fp8,0,0.048122664292653404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,2,64,128,1,fp8,fp8,0,0.04789333542188009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,2,64,0,1,float16,fp8,0,0.23958400885264078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,2,64,0,1,fp8,fp8,0,0.20788800716400146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,float16,0,0.04602666695912679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,1,64,128,1,float16,fp8,0,0.04619200030962626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,1,64,128,1,fp8,fp8,0,0.04557866851488749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,float16,0,0.23704000314076742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,float16,0,0.03782933453718821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,1,64,0,1,float16,fp8,0,0.23621867100397745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,1,64,0,1,fp8,fp8,0,0.2038080096244812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,2,64,128,1,float16,fp8,0,0.0378560001651446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,2,64,128,1,fp8,fp8,0,0.03591466695070267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,float16,0,0.16522133350372314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,2,64,0,1,float16,fp8,0,0.1650879979133606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,2,64,0,1,fp8,fp8,0,0.14451199769973755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,1,64,128,1,float16,fp8,0,0.03782399992148081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,1,64,128,1,fp8,fp8,0,0.035605333745479584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,float16,0,0.16475733121236166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,1,64,0,1,fp8,fp8,0,0.14519466956456503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,1,64,0,1,float16,fp8,0,0.16513066490491232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,float16,0,0.09303999940554301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,2,1,64,128,1,float16,fp8,0,0.09579199552536011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,2,1,64,128,1,fp8,fp8,0,0.09321600198745728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,float16,0,0.39108800888061523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,float16,0,0.058176000912984215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,2,1,64,0,1,fp8,fp8,0,0.34513068199157715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,2,1,64,0,1,float16,fp8,0,0.3950186570485433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,2,64,128,1,float16,fp8,0,0.06001600126425425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,2,64,128,1,fp8,fp8,0,0.058362667759259544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,float16,0,0.23655466238657633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,float16,0,0.05470933516820272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,2,64,0,1,float16,fp8,0,0.23866132895151773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,2,64,0,1,fp8,fp8,0,0.2098133365313212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,1,64,128,1,float16,fp8,0,0.05750933289527893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,1,64,128,1,fp8,fp8,0,0.05433600147565206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,float16,0,0.23457600673039755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,float16,0,0.04162133236726125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,1,64,0,1,float16,fp8,0,0.23435733715693155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,1,64,0,1,fp8,fp8,0,0.20639999707539877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,float16,0,0.15650666753451029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,2,64,128,1,float16,fp8,0,0.041759997606277466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,2,64,128,1,fp8,fp8,0,0.04177600145339966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,2,64,0,1,float16,fp8,0,0.1551253298918406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,2,64,0,1,fp8,fp8,0,0.13689600427945456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,float16,0,0.041936000188191734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,1,64,128,1,float16,fp8,0,0.04151466737190882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,1,64,128,1,fp8,fp8,0,0.0391839991013209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,fp8,0,0.1567146678765615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,1,64,0,1,float16,float16,0,0.15546666582425436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,float16,0,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,1,64,0,1,fp8,fp8,0,0.1341333289941152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,float16,0,0.12738133470217386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,2,64,128,1,float16,fp8,0,0.03405333310365677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,2,64,128,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,2,64,0,1,float16,fp8,0,0.12778133153915405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,2,64,0,1,fp8,fp8,0,0.11173333724339803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,float16,0,0.033301333586374916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,float16,0,0.12771200140317282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,1,64,128,1,float16,fp8,0,0.03179733455181122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,1,64,128,1,fp8,fp8,0,0.03262399882078171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,1,64,0,1,float16,fp8,0,0.1283146639664968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,1,64,0,1,fp8,fp8,0,0.1116373340288798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,float16,0,0.07982933521270752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,2,1,64,128,1,float16,fp8,0,0.0831520011027654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,float16,0,0.2949333389600118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,2,1,64,128,1,fp8,fp8,0,0.07971733311812083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,2,1,64,0,1,float16,fp8,0,0.29471999406814575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,float16,0,0.05215999980767568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,float16,0,0.18545599778493246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,2,1,64,0,1,fp8,fp8,0,0.259168008963267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,2,64,128,1,float16,fp8,0,0.05431999762852987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,2,64,128,1,fp8,fp8,0,0.052202666799227394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,2,64,0,1,float16,fp8,0,0.18733867009480795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,2,64,0,1,fp8,fp8,0,0.16290133198102316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,float16,0,0.05019199848175049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,float16,0,0.18346667289733887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,1,64,128,1,float16,fp8,0,0.05212800204753876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,1,64,128,1,fp8,fp8,0,0.04978133241335551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,1,64,0,1,float16,fp8,0,0.183786670366923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,1,64,0,1,fp8,fp8,0,0.1612213353315989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,float16,0,0.11772800485293071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,2,64,128,1,float16,fp8,0,0.039706667264302574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,2,64,128,1,fp8,fp8,0,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,2,64,0,1,float16,fp8,0,0.11820266644159953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,2,64,0,1,fp8,fp8,0,0.10377599795659383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,float16,0,0.03771200031042099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,1,64,128,1,float16,fp8,0,0.037989333271980286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,float16,0,0.11801066994667053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,1,64,128,1,fp8,fp8,0,0.03568533311287562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,1,64,0,1,float16,fp8,0,0.11744532982508342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,1,64,0,1,fp8,fp8,0,0.10340266426404317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,float16,0,0.029466666281223297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,float16,0,0.10940266648928325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,2,64,128,1,float16,fp8,0,0.031258667508761086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,2,64,128,1,fp8,fp8,0,0.029152000943819683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,2,64,0,1,float16,fp8,0,0.1095306674639384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,float16,0,0.031530665854612984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,2,64,0,1,fp8,fp8,0,0.09518399834632874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,1,64,128,1,float16,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,float16,0,0.10982933640480042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,1,64,128,1,fp8,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,1,64,0,1,float16,fp8,0,0.10937066872914632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,1,64,0,1,fp8,fp8,0,0.09738133351008098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,float16,0,0.11758933464686076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,2,1,64,128,1,float16,fp8,0,0.12025599678357442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,2,1,64,128,1,fp8,fp8,0,0.1176479955514272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,float16,0,0.3668746550877889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,float16,0,0.07053333520889282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,2,1,64,0,1,float16,fp8,0,0.37089065710703534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,2,1,64,0,1,fp8,fp8,0,0.32730666796366376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,float16,0,0.2157706618309021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,2,64,128,1,float16,fp8,0,0.07283733288447063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,2,64,128,1,fp8,fp8,0,0.07283733288447063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,float16,0,0.06614933411280315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,2,64,0,1,fp8,fp8,0,0.195360004901886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,2,64,0,1,float16,fp8,0,0.2185866634051005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,1,64,128,1,float16,fp8,0,0.0684799998998642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,1,64,128,1,fp8,fp8,0,0.06583466629187266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,float16,0,0.20997865994771323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,float16,0,0.043807998299598694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,1,64,0,1,fp8,fp8,0,0.1856266657511393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,1,64,0,1,float16,fp8,0,0.213919997215271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,float16,0,0.13823466499646506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,2,64,128,1,float16,fp8,0,0.04398933549722036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,2,64,128,1,fp8,fp8,0,0.04390933116277059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,2,64,0,1,float16,fp8,0,0.13921067118644714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,float16,0,0.042394667863845825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,2,64,0,1,fp8,fp8,0,0.12212799986203511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,float16,0,0.1365493337313334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,1,64,128,1,float16,fp8,0,0.04418133199214935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,1,64,128,1,fp8,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,float16,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,1,64,0,1,float16,fp8,0,0.13613333304723105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,1,64,0,1,fp8,fp8,0,0.12158933281898499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,float16,0,0.0932426651318868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,2,64,128,1,float16,fp8,0,0.030229332546393078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,2,64,128,1,fp8,fp8,0,0.029887999097506206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,float16,0,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,2,64,0,1,fp8,fp8,0,0.08271466692288716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,2,64,0,1,float16,fp8,0,0.093231995900472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,1,64,128,1,float16,fp8,0,0.02922133356332779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,1,64,128,1,fp8,fp8,0,0.02805333336194356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,float16,0,0.09309867024421692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,float16,0,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,1,64,0,1,float16,fp8,0,0.09306666254997253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,1,64,0,1,fp8,fp8,0,0.08081066608428955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,2,64,128,1,float16,fp8,0,0.02829866607983907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,2,64,128,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,float16,0,0.09108266234397888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,2,64,0,1,float16,fp8,0,0.08962133526802063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,float16,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,2,64,0,1,fp8,fp8,0,0.08089066545168559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,1,64,128,1,float16,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,1,64,128,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,float16,0,0.08939733107884724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,1,64,0,1,float16,fp8,0,0.09118400017420451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,1,64,0,1,fp8,fp8,0,0.0799786647160848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,float16,0,0.09289066990216573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,float16,0,0.23543999592463175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,2,1,64,128,1,float16,fp8,0,0.09576533238093059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,2,1,64,128,1,fp8,fp8,0,0.09331199526786804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,2,1,64,0,1,float16,fp8,0,0.2392586668332418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,2,1,64,0,1,fp8,fp8,0,0.2136746644973755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,float16,0,0.056986664732297264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,float16,0,0.14428266882896423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,2,64,128,1,float16,fp8,0,0.05796800057093302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,2,64,128,1,fp8,fp8,0,0.05693866809209188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,2,64,0,1,float16,fp8,0,0.1464906632900238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,2,64,0,1,fp8,fp8,0,0.13011200229326883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,float16,0,0.05301333467165629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,float16,0,0.1406826674938202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,1,64,128,1,float16,fp8,0,0.055999999245007835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,1,64,128,1,fp8,fp8,0,0.054005334774653115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,1,64,0,1,float16,fp8,0,0.14236266414324442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,1,64,0,1,fp8,fp8,0,0.1237333317597707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,float16,0,0.03772799919048945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,float16,0,0.09334933757781982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,2,64,128,1,float16,fp8,0,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,2,64,128,1,fp8,fp8,0,0.03755199909210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,2,64,0,1,float16,fp8,0,0.09514666597048442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,2,64,0,1,fp8,fp8,0,0.08486933509508769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,float16,0,0.037615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,float16,0,0.0918346643447876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,1,64,128,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,1,64,128,1,fp8,fp8,0,0.0359253336985906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,1,64,0,1,float16,fp8,0,0.09317866961161296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,1,64,0,1,fp8,fp8,0,0.08226666847864787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,float16,0,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,float16,0,0.07329600056012471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,2,64,128,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,2,64,128,1,fp8,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,2,64,0,1,float16,fp8,0,0.07457600037256877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,2,64,0,1,fp8,fp8,0,0.06638933221499126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,float16,0,0.07292800148328145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,1,64,128,1,float16,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,1,64,128,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,1,64,0,1,float16,fp8,0,0.07445333401362102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,1,64,0,1,fp8,fp8,0,0.0662720004717509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,float16,0,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,float16,0,0.07090666890144348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,2,64,128,1,float16,fp8,0,0.026101333399613697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,2,64,128,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,2,64,0,1,float16,fp8,0,0.07268266876538594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,2,64,0,1,fp8,fp8,0,0.06321600079536438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,float16,0,0.025701334079106648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,float16,0,0.07257066667079926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,1,64,128,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,1,64,128,1,fp8,fp8,0,0.02478400121132533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,1,64,0,1,float16,fp8,0,0.07292266686757405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,1,64,0,1,fp8,fp8,0,0.06431999802589417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,float16,0,0.12061867117881775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,2,1,64,128,1,float16,fp8,0,0.1237440009911855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,float16,0,0.24000000953674316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,2,1,64,128,1,fp8,fp8,0,0.11975466211636861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,2,1,64,0,1,float16,fp8,0,0.23875200748443604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,2,1,64,0,1,fp8,fp8,0,0.21843733390172324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,float16,0,0.07029866675535838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,float16,0,0.1386666695276896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,2,64,128,1,float16,fp8,0,0.07291733225186665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,2,64,128,1,fp8,fp8,0,0.07455466687679291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,2,64,0,1,float16,fp8,0,0.1404213309288025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,float16,0,0.0672266681989034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,2,64,0,1,fp8,fp8,0,0.12966400384902954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,1,64,128,1,float16,fp8,0,0.06854400038719177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,float16,0,0.1341866652170817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,1,64,128,1,fp8,fp8,0,0.06638399759928386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,float16,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,1,64,0,1,fp8,fp8,0,0.12230933705965678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,1,64,0,1,float16,fp8,0,0.13635200262069702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,float16,0,0.08916266759236653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,2,64,128,1,float16,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,2,64,128,1,fp8,fp8,0,0.04215999941031138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,float16,0,0.04146133363246918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,2,64,0,1,fp8,fp8,0,0.07922133306662242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,2,64,0,1,float16,fp8,0,0.09058133761088054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,1,64,128,1,float16,fp8,0,0.041802664597829185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,1,64,128,1,fp8,fp8,0,0.04172799984614054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,float16,0,0.08716266353925069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,float16,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,1,64,0,1,float16,fp8,0,0.0886346697807312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,1,64,0,1,fp8,fp8,0,0.07870933413505554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,2,64,128,1,float16,fp8,0,0.02980799973011017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,float16,0,0.05845333139101664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,2,64,128,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,2,64,0,1,float16,fp8,0,0.06036800146102905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,2,64,0,1,fp8,fp8,0,0.05216533442338308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,float16,0,0.05823466678460439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,1,64,128,1,float16,fp8,0,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,1,64,128,1,fp8,fp8,0,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,1,64,0,1,float16,fp8,0,0.05818133552869161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,float16,0,0.023951999843120575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,1,64,0,1,fp8,fp8,0,0.05231999854246775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,float16,0,0.05412266651789347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,2,64,128,1,float16,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,2,64,128,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,2,64,0,1,float16,fp8,0,0.055904000997543335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,float16,0,0.023786666492621105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,2,64,0,1,fp8,fp8,0,0.049626668294270836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,float16,0,0.05421333511670431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,1,64,128,1,float16,fp8,0,0.02491733431816101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,1,64,128,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,1,64,0,1,float16,fp8,0,0.053685332338015236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,1,64,0,1,fp8,fp8,0,0.04828266799449921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,float16,0,0.05314133564631144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,2,64,128,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,2,64,128,1,fp8,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,2,64,0,1,float16,fp8,0,0.052149335543314614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,2,64,0,1,fp8,fp8,0,0.048112000028292336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,float16,0,0.05420800050099691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,1,64,128,1,float16,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,1,64,128,1,fp8,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,1,64,0,1,float16,fp8,0,0.05262400209903717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,1,64,0,1,fp8,fp8,0,0.046336000164349876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,float16,0,0.0993226667245229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,float16,0,0.16360533237457275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,2,1,64,128,1,float16,fp8,0,0.09944533308347066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,2,1,64,128,1,fp8,fp8,0,0.09961600104967754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,2,1,64,0,1,float16,fp8,0,0.16579733292261759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,float16,0,0.05798399945100149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,float16,0,0.09858666857083638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,2,1,64,0,1,fp8,fp8,0,0.1525813341140747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,2,64,128,1,float16,fp8,0,0.059402664502461754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,2,64,128,1,fp8,fp8,0,0.05819733440876007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,2,64,0,1,fp8,fp8,0,0.09103467067082723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,2,64,0,1,float16,fp8,0,0.09980266292889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,float16,0,0.05400000015894572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,float16,0,0.09307199716567993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,1,64,128,1,float16,fp8,0,0.05795200169086456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,1,64,128,1,fp8,fp8,0,0.054192001620928444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,1,64,0,1,float16,fp8,0,0.09643733501434326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,1,64,0,1,fp8,fp8,0,0.08463467160860698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,float16,0,0.03975466638803482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,float16,0,0.06451733410358429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,2,64,128,1,float16,fp8,0,0.039936001102129616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,2,64,128,1,fp8,fp8,0,0.037674665451049805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,2,64,0,1,float16,fp8,0,0.06448533137639363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,2,64,0,1,fp8,fp8,0,0.05795733133951823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,float16,0,0.037477334340413414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,float16,0,0.062218666076660156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,1,64,128,1,float16,fp8,0,0.03923200070858002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,1,64,128,1,fp8,fp8,0,0.03605866680542628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,1,64,0,1,float16,fp8,0,0.06427200138568878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,1,64,0,1,fp8,fp8,0,0.056074668963750206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,float16,0,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,float16,0,0.04966933528582255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,2,64,128,1,float16,fp8,0,0.027610667049884796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,2,64,128,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,2,64,0,1,float16,fp8,0,0.04808000226815542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,2,64,0,1,fp8,fp8,0,0.044122666120529175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,float16,0,0.025397333006064098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,1,64,128,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,float16,0,0.048122664292653404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,1,64,128,1,fp8,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,1,64,0,1,float16,fp8,0,0.04804799954096476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,1,64,0,1,fp8,fp8,0,0.0436106671889623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,float16,0,0.043968002001444496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,2,64,128,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,2,64,128,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,2,64,0,1,fp8,fp8,0,0.04163199911514918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,float16,0,0.023589332898457844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,2,64,0,1,float16,fp8,0,0.04795733094215393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,1,64,128,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,1,64,128,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,float16,0,0.045925334095954895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,1,64,0,1,float16,fp8,0,0.04419200122356415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,1,64,0,1,fp8,fp8,0,0.03990933299064636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,float16,0,0.0436160018046697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,2,64,128,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,2,64,128,1,fp8,fp8,0,0.021984001000722248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,2,64,0,1,float16,fp8,0,0.04362666606903076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,float16,0,0.021583999196688335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,2,64,0,1,fp8,fp8,0,0.03967999915281931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,float16,0,0.04386133452256521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,1,64,128,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,1,64,128,1,fp8,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,1,64,0,1,float16,fp8,0,0.044218664367993675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,1,64,0,1,fp8,fp8,0,0.037989333271980286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,float16,0,0.12109333276748657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,float16,0,0.17110933860143027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,2,1,64,128,1,float16,fp8,0,0.12131733695665996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,2,1,64,128,1,fp8,fp8,0,0.11872000495592754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,2,1,64,0,1,float16,fp8,0,0.17250667015711466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,2,1,64,0,1,fp8,fp8,0,0.15999466180801392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,float16,0,0.07067200044790904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,float16,0,0.09939199686050415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,2,64,128,1,float16,fp8,0,0.07498666644096375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,2,64,128,1,fp8,fp8,0,0.07178666690985362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,2,64,0,1,float16,fp8,0,0.10021866361300151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,2,64,0,1,fp8,fp8,0,0.09521599610646565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,float16,0,0.06621866424878438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,float16,0,0.09325333436330159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,1,64,128,1,float16,fp8,0,0.0684853345155716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,1,64,128,1,fp8,fp8,0,0.06593066453933716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,1,64,0,1,float16,fp8,0,0.09703466296195984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,float16,0,0.04146666576464971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,1,64,0,1,fp8,fp8,0,0.08793600400288899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,float16,0,0.06241600215435028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,2,64,128,1,float16,fp8,0,0.043920000394185386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,2,64,128,1,fp8,fp8,0,0.04189866781234741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,2,64,0,1,fp8,fp8,0,0.058090666929880776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,float16,0,0.03968533376852671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,2,64,0,1,float16,fp8,0,0.06252266466617584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,float16,0,0.06039999922116598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,1,64,128,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,1,64,128,1,fp8,fp8,0,0.03979199876387914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,1,64,0,1,float16,fp8,0,0.05985599756240845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,1,64,0,1,fp8,fp8,0,0.05635733405749003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,float16,0,0.028549333413441975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,float16,0,0.04174399872620901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,2,64,128,1,float16,fp8,0,0.02937600016593933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,2,64,128,1,fp8,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,2,64,0,1,float16,fp8,0,0.04187199970086416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,2,64,0,1,fp8,fp8,0,0.03951466580231985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,float16,0,0.02828799933195114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,float16,0,0.040976000328858696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,1,64,128,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,1,64,128,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,1,64,0,1,float16,fp8,0,0.041482667128245033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,1,64,0,1,fp8,fp8,0,0.03775999943415324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,float16,0,0.03570666660865148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,2,64,128,1,float16,fp8,0,0.024314666787783306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,2,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,2,64,0,1,float16,fp8,0,0.03788800040880839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,float16,0,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,2,64,0,1,fp8,fp8,0,0.03334933271010717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,float16,0,0.035749333600203194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,1,64,128,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,1,64,128,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,1,64,0,1,float16,fp8,0,0.0354720006386439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,1,64,0,1,fp8,fp8,0,0.03374933451414108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,float16,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,float16,0,0.033786666889985405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,2,64,128,1,float16,fp8,0,0.02162133405605952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,2,64,128,1,fp8,fp8,0,0.020821332931518555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,2,64,0,1,float16,fp8,0,0.03373866776625315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,2,64,0,1,fp8,fp8,0,0.03204799940188726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,float16,0,0.034688000877698265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,1,64,128,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,1,64,128,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,1,64,0,1,float16,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,1,64,0,1,fp8,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,float16,0,0.020773333807786305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,float16,0,0.033615998923778534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,2,64,128,1,float16,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,2,64,128,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,2,64,0,1,float16,fp8,0,0.03369066615899404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,2,64,0,1,fp8,fp8,0,0.02993600070476532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,float16,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,float16,0,0.03382933388153712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,1,64,128,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,1,64,128,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,1,64,0,1,float16,fp8,0,0.033786666889985405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,1,64,0,1,fp8,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,float16,0,0.09899200002352397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,2,1,64,128,1,float16,fp8,0,0.10290132959683736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,float16,0,0.12468266487121582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,2,1,64,128,1,fp8,fp8,0,0.097653329372406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,2,1,64,0,1,float16,fp8,0,0.1260640025138855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,2,1,64,0,1,fp8,fp8,0,0.11736533045768738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,float16,0,0.05834133426348368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,float16,0,0.07460266848405202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,2,64,128,1,float16,fp8,0,0.059749335050582886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,2,64,128,1,fp8,fp8,0,0.05845866600672404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,2,64,0,1,float16,fp8,0,0.075162669022878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,2,64,0,1,fp8,fp8,0,0.06851733227570851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,float16,0,0.05630399783452352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,float16,0,0.07196266452471416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,1,64,128,1,float16,fp8,0,0.057999998331069946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,1,64,128,1,fp8,fp8,0,0.05606933434804281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,1,64,0,1,float16,fp8,0,0.07294933497905731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,1,64,0,1,fp8,fp8,0,0.06790933509667714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,float16,0,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,float16,0,0.047797332207361855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,2,64,128,1,float16,fp8,0,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,2,64,128,1,fp8,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,2,64,0,1,float16,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,2,64,0,1,fp8,fp8,0,0.04566933214664459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,float16,0,0.03755733370780945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,float16,0,0.04822933177153269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,1,64,128,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,1,64,128,1,fp8,fp8,0,0.036490666369597115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,1,64,0,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,1,64,0,1,fp8,fp8,0,0.043738668163617454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,float16,0,0.027045334378878277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,float16,0,0.036090667049090065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,2,64,128,1,float16,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,2,64,0,1,float16,fp8,0,0.03577066709597906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,2,64,128,1,fp8,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,2,64,0,1,fp8,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,1,64,128,1,float16,fp8,0,0.025637333591779072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,float16,0,0.03572800010442734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,1,64,128,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,1,64,0,1,float16,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,1,64,0,1,fp8,fp8,0,0.03241066634654999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,float16,0,0.03176533430814743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,2,64,128,1,float16,fp8,0,0.023045333723227184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,2,64,128,1,fp8,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,2,64,0,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,2,64,0,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,float16,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,float16,0,0.03146666785081228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,1,64,128,1,float16,fp8,0,0.02346133440732956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,1,64,128,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,1,64,0,1,float16,fp8,0,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,1,64,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,float16,0,0.029370665550231934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,2,64,128,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,2,64,128,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,2,64,0,1,float16,fp8,0,0.030421334008375805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,2,64,0,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,float16,0,0.029445332785447437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,1,64,128,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,1,64,128,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,1,64,0,1,float16,fp8,0,0.02938666691382726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,1,64,0,1,fp8,fp8,0,0.027114666998386383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,float16,0,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,2,64,128,1,float16,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,2,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,2,64,0,1,float16,fp8,0,0.029818666477998097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,2,64,0,1,fp8,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,1,64,128,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,float16,0,0.027295999228954315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,1,64,128,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,1,64,0,1,float16,fp8,0,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,1,64,0,1,fp8,fp8,0,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,float16,0,0.10108266274134318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,float16,0,0.11786666512489319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,2,1,64,128,1,float16,fp8,0,0.10077866911888123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,2,1,64,128,1,fp8,fp8,0,0.09934399525324504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,2,1,64,0,1,float16,fp8,0,0.11727466185887654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,2,1,64,0,1,fp8,fp8,0,0.11155733466148376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,float16,0,0.059802666306495667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,float16,0,0.07021333277225494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,2,64,128,1,float16,fp8,0,0.05964800218741099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,2,64,128,1,fp8,fp8,0,0.06019733349482218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,2,64,0,1,float16,fp8,0,0.06943466762701671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,2,64,0,1,fp8,fp8,0,0.06795733173688252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,float16,0,0.058042665322621666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,float16,0,0.06654933094978333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,1,64,128,1,float16,fp8,0,0.0576853354771932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,1,64,128,1,fp8,fp8,0,0.05585066477457682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,1,64,0,1,float16,fp8,0,0.06714133421579997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,1,64,0,1,fp8,fp8,0,0.062074666221936546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,float16,0,0.03631466627120972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,float16,0,0.04494933287302653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,2,64,128,1,float16,fp8,0,0.03669866671164831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,2,64,128,1,fp8,fp8,0,0.037647999823093414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,2,64,0,1,float16,fp8,0,0.04407466451327006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,2,64,0,1,fp8,fp8,0,0.041536000867684685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,float16,0,0.03736000011364619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,1,64,128,1,float16,fp8,0,0.03554133325815201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,float16,0,0.04377066592375437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,1,64,128,1,fp8,fp8,0,0.035760000348091125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,1,64,0,1,float16,fp8,0,0.04366933306058248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,1,64,0,1,fp8,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,2,64,128,1,float16,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,float16,0,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,2,64,128,1,fp8,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,2,64,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,2,64,0,1,fp8,fp8,0,0.029509333272775013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,float16,0,0.02587199956178665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,float16,0,0.029482667644818623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,1,64,128,1,float16,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,1,64,128,1,fp8,fp8,0,0.025807999074459076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,1,64,0,1,float16,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,float16,0,0.022730665902296703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,1,64,0,1,fp8,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,float16,0,0.025637333591779072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,2,64,128,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,2,64,128,1,fp8,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,2,64,0,1,float16,fp8,0,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,2,64,0,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,float16,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,1,64,128,1,float16,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,1,64,128,1,fp8,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,1,64,0,1,float16,fp8,0,0.026677332818508148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,1,64,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,2,64,128,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,2,64,128,1,fp8,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,2,64,0,1,float16,fp8,0,0.024618667860825855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,2,64,0,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,1,64,128,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,1,64,128,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,1,64,0,1,float16,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,1,64,0,1,fp8,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,float16,0,0.023711999257405598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,2,64,128,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,2,64,128,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,2,64,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,2,64,0,1,fp8,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,float16,0,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,1,64,128,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,float16,0,0.023872000475724537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,1,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,1,64,0,1,float16,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,1,64,0,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,float16,0,0.019637333850065868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,2,64,128,1,float16,fp8,0,0.019679999599854153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,float16,0,0.02436800052722295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,2,64,128,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,2,64,0,1,float16,fp8,0,0.02447466552257538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,2,64,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,float16,0,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,float16,0,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,1,64,128,1,float16,fp8,0,0.02037866661945979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,1,64,128,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,1,64,0,1,float16,fp8,0,0.023717333873112995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,1,64,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,2,1,64,0,1,float16,float16,0,0.09970133503278096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,2,1,64,128,1,float16,fp8,0,0.09911466638247173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,2,1,64,128,1,fp8,fp8,0,0.09716799855232239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,2,1,64,128,1,float16,float16,0,0.10140267014503479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,2,64,128,1,float16,float16,0,0.058186665177345276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,2,64,0,1,float16,float16,0,0.05845333139101664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,2,1,64,0,1,float16,fp8,0,0.09937066833178203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,2,1,64,0,1,fp8,fp8,0,0.09619200229644775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,2,64,128,1,float16,fp8,0,0.057631999254226685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,2,64,0,1,float16,fp8,0,0.05754133562246958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,2,64,128,1,fp8,fp8,0,0.05796800057093302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,2,64,0,1,fp8,fp8,0,0.05792533357938131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,1,64,128,1,float16,float16,0,0.05630399783452352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,1,64,0,1,float16,float16,0,0.056143999099731445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,1,64,128,1,float16,fp8,0,0.05595199763774872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,1,64,128,1,fp8,fp8,0,0.05460800230503082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,1,64,0,1,float16,fp8,0,0.05619733532269796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,1,64,0,1,fp8,fp8,0,0.05260799825191498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,2,64,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,2,64,128,1,float16,float16,0,0.0379573330283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,2,64,128,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,2,64,128,1,fp8,fp8,0,0.03694933404525121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,2,64,0,1,float16,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,2,64,0,1,fp8,fp8,0,0.03517866631348928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,1,64,128,1,float16,float16,0,0.03586133321126302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,1,64,0,1,float16,float16,0,0.03749866783618927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,1,64,128,1,float16,fp8,0,0.035391998787721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,1,64,0,1,float16,fp8,0,0.03743999948104223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,1,64,128,1,fp8,fp8,0,0.0348693331082662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,1,64,0,1,fp8,fp8,0,0.035205334424972534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,2,64,128,1,float16,float16,0,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,2,64,0,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,2,64,128,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,2,64,0,1,float16,fp8,0,0.026890667776266735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,2,64,0,1,fp8,fp8,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,2,64,128,1,fp8,fp8,0,0.02573866645495097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,1,64,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,1,64,128,1,float16,float16,0,0.02553066611289978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,1,64,128,1,float16,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,1,64,128,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,1,64,0,1,float16,fp8,0,0.025727999707063038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,1,64,0,1,fp8,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,2,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,2,64,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,2,64,128,1,float16,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,2,64,128,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,2,64,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,2,64,0,1,fp8,fp8,0,0.021370666722456615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,1,64,128,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,1,64,128,1,float16,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,1,64,0,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,1,64,128,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,1,64,0,1,float16,fp8,0,0.02306666721900304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,1,64,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,2,64,128,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,2,64,0,1,float16,float16,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,2,64,128,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,2,64,128,1,fp8,fp8,0,0.01958400011062622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,2,64,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,2,64,0,1,fp8,fp8,0,0.020634666085243225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,1,64,128,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,1,64,0,1,float16,float16,0,0.020853333175182343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,1,64,128,1,float16,fp8,0,0.01971199984351794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,1,64,128,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,1,64,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,1,64,0,1,fp8,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,2,64,128,1,float16,float16,0,0.019578666736682255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,2,64,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,2,64,128,1,float16,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,2,64,128,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,2,64,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,2,64,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,1,64,128,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,1,64,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,1,64,128,1,float16,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,1,64,128,1,fp8,fp8,0,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,1,64,0,1,float16,fp8,0,0.0205226664741834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,2,64,128,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,1,64,0,1,fp8,fp8,0,0.019637333850065868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,2,64,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,2,64,128,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,2,64,128,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,2,64,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,2,64,0,1,fp8,fp8,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,1,64,128,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,1,64,0,1,float16,float16,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,1,64,128,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,1,64,128,1,fp8,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,1,64,0,1,float16,fp8,0,0.01988799994190534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,1,64,0,1,fp8,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,2,64,128,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,2,64,0,1,float16,float16,0,0.0198186660806338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,2,64,0,1,float16,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,2,64,128,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,2,64,128,1,float16,fp8,0,0.02035733312368393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,2,64,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,1,64,128,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,1,64,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,1,64,128,1,float16,fp8,0,0.02032533288002014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,1,64,128,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,1,64,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,1,64,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,2,1,64,0,1,float16,float16,0,0.05197333296140035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,2,1,64,128,1,float16,float16,0,0.05421866476535797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,2,1,64,128,1,float16,fp8,0,0.05232533315817515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,2,1,64,128,1,fp8,fp8,0,0.05202133456865946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,2,64,128,1,float16,float16,0,0.03388266762097677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,2,1,64,0,1,float16,fp8,0,0.05233600238958994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,2,1,64,0,1,fp8,fp8,0,0.05036266644795736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,2,64,0,1,float16,float16,0,0.03339199970165888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,2,64,128,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,2,64,128,1,float16,fp8,0,0.03475733349720637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,2,64,0,1,float16,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,2,64,0,1,fp8,fp8,0,0.033546666304270424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,1,64,128,1,float16,float16,0,0.03480533262093862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,1,64,0,1,float16,float16,0,0.03233066697915395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,1,64,128,1,float16,fp8,0,0.03344533344109853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,1,64,128,1,fp8,fp8,0,0.03329599897066752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,2,64,128,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,1,64,0,1,float16,fp8,0,0.03316266586383184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,1,64,0,1,fp8,fp8,0,0.03183466692765554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,2,64,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,2,64,128,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,2,64,128,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,2,64,0,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,2,64,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,1,64,128,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,1,64,0,1,float16,float16,0,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,1,64,128,1,float16,fp8,0,0.02346666653951009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,1,64,128,1,fp8,fp8,0,0.023647998770078022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,2,64,128,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,1,64,0,1,float16,fp8,0,0.02386666586001714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,1,64,0,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,2,64,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,2,64,128,1,float16,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,2,64,128,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,2,64,0,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,1,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,2,64,0,1,fp8,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,1,64,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,1,64,128,1,float16,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,1,64,128,1,fp8,fp8,0,0.02060266708334287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,1,64,0,1,float16,fp8,0,0.020282667130231857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,1,64,0,1,fp8,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,2,64,128,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,2,64,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,2,64,128,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,2,64,128,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,2,64,0,1,float16,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,2,64,0,1,fp8,fp8,0,0.01829333355029424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,1,64,128,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,1,64,0,1,float16,float16,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,1,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,1,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,1,64,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,1,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,2,64,128,1,float16,float16,0,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,2,64,0,1,float16,float16,0,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,2,64,128,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,2,64,128,1,fp8,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,2,64,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,2,64,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,1,64,128,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,1,64,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,1,64,128,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,1,64,128,1,fp8,fp8,0,0.01766933376590411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,1,64,0,1,float16,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,1,64,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,2,64,0,1,float16,float16,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,2,64,128,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,2,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,2,64,128,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,2,64,0,1,float16,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,2,64,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,1,64,128,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,1,64,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,1,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,1,64,128,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,1,64,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,1,64,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,2,64,128,1,float16,float16,0,0.018581333259741466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,2,64,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,2,64,128,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,2,64,128,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,2,64,0,1,float16,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,2,64,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,1,64,128,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,1,64,128,1,float16,fp8,0,0.017680000513792038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,1,64,0,1,float16,float16,0,0.017632000148296356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,1,64,128,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,1,64,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,1,64,0,1,fp8,fp8,0,0.016778666526079178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,2,64,128,1,float16,float16,0,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,2,64,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,2,64,128,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,2,64,128,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,2,64,0,1,float16,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,2,64,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,1,64,128,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,1,64,128,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,1,64,0,1,float16,float16,0,0.017594666530688603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,1,64,128,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,1,64,0,1,float16,fp8,0,0.01886933296918869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,1,64,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,2,1,64,128,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,2,1,64,0,1,float16,float16,0,0.035599999129772186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,2,1,64,128,1,fp8,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,2,1,64,128,1,float16,float16,0,0.034832000732421875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,2,1,64,0,1,float16,fp8,0,0.03505066782236099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,2,1,64,0,1,fp8,fp8,0,0.033600000043710075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,2,64,128,1,float16,float16,0,0.02403733382622401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,2,64,0,1,float16,float16,0,0.024933333198229473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,2,64,128,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,2,64,128,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,2,64,0,1,float16,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,2,64,0,1,fp8,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,1,64,0,1,float16,float16,0,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,1,64,128,1,float16,float16,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,1,64,128,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,1,64,128,1,fp8,fp8,0,0.023658665517965954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,1,64,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,1,64,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,2,64,128,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,2,64,128,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,2,64,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,2,64,128,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,2,64,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,2,64,0,1,fp8,fp8,0,0.01841066653529803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,1,64,128,1,float16,float16,0,0.018746666610240936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,1,64,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,1,64,128,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,1,64,128,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,1,64,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,1,64,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,2,64,128,1,float16,float16,0,0.01565333331624667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,2,64,0,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,2,64,128,1,fp8,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,2,64,128,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,2,64,0,1,float16,fp8,0,0.01591466615597407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,2,64,0,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,1,64,128,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,1,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,1,64,128,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,1,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,1,64,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,1,64,0,1,fp8,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,2,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,2,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,2,64,128,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,2,64,128,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,2,64,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,2,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,1,64,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,1,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,1,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,1,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,1,64,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,1,64,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,2,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,2,64,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,2,64,128,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,2,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,2,64,0,1,float16,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,2,64,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,1,64,128,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,1,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,1,64,128,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,1,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,1,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,1,64,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,2,64,128,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,2,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,2,64,128,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,2,64,128,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,2,64,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,2,64,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,1,64,128,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,1,64,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,1,64,128,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,1,64,128,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,1,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,1,64,0,1,fp8,fp8,0,0.016303999970356624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,2,64,128,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,2,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,2,64,128,1,float16,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,2,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,2,64,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,2,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,1,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,1,64,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,1,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,1,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,1,64,0,1,float16,fp8,0,0.015754666179418564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,1,64,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,2,64,128,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,2,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,2,64,128,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,2,64,128,1,fp8,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,2,64,0,1,float16,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,2,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,1,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,1,64,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,1,64,128,1,float16,fp8,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,1,64,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,1,64,128,1,fp8,fp8,0,0.016165333489576977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,1,64,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,2,1,64,128,1,float16,float16,0,0.02937600016593933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,2,1,64,0,1,float16,float16,0,0.027674667537212372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,2,1,64,128,1,float16,fp8,0,0.030053332448005676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,2,1,64,128,1,fp8,fp8,0,0.027850667635599773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,2,1,64,0,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,2,64,128,1,float16,float16,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,2,64,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,2,1,64,0,1,fp8,fp8,0,0.02903999884923299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,2,64,128,1,float16,fp8,0,0.02082666630546252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,2,64,128,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,2,64,0,1,float16,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,2,64,0,1,fp8,fp8,0,0.020901332298914593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,1,64,128,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,1,64,128,1,float16,fp8,0,0.020207999895016353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,1,64,0,1,float16,float16,0,0.0198186660806338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,1,64,128,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,1,64,0,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,2,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,1,64,0,1,fp8,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,2,64,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,2,64,128,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,2,64,128,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,2,64,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,2,64,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,1,64,128,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,1,64,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,1,64,128,1,float16,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,1,64,128,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,1,64,0,1,float16,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,1,64,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,2,64,128,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,2,64,0,1,float16,float16,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,2,64,128,1,float16,fp8,0,0.016410666207472484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,2,64,0,1,float16,fp8,0,0.016730666160583496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,2,64,128,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,2,64,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,1,64,128,1,float16,float16,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,1,64,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,1,64,128,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,1,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,1,64,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,1,64,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,2,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,2,64,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,2,64,128,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,2,64,128,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,2,64,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,2,64,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,1,64,128,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,1,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,1,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,1,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,1,64,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,1,64,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,2,64,128,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,2,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,2,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,2,64,128,1,fp8,fp8,0,0.016282666474580765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,2,64,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,1,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,2,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,1,64,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,1,64,128,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,1,64,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,1,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,1,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,2,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,2,64,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,2,64,128,1,float16,fp8,0,0.01595199977358182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,2,64,128,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,2,64,0,1,float16,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,2,64,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,1,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,1,64,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,1,64,128,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,1,64,128,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,1,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,1,64,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,2,64,128,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,2,64,0,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,2,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,2,64,128,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,2,64,0,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,2,64,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,1,64,128,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,1,64,128,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,1,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,1,64,128,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,1,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,2,64,128,1,float16,float16,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,2,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,2,64,128,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,2,64,0,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,2,64,128,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,2,64,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,1,64,128,1,float16,float16,0,0.014741333822409311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,1,64,0,1,float16,float16,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,1,64,128,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,1,64,128,1,float16,fp8,0,0.015935999651749928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,1,64,0,1,float16,fp8,0,0.014720000326633453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,1,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,2,1,64,128,1,float16,float16,0,0.025648000339667004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,2,1,64,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,2,1,64,128,1,float16,fp8,0,0.025594666600227356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,2,1,64,0,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,2,1,64,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,2,1,64,128,1,fp8,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,2,64,128,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,2,64,0,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,2,64,128,1,float16,fp8,0,0.01974933346112569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,2,64,128,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,2,64,0,1,float16,fp8,0,0.019797333826621372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,2,64,0,1,fp8,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,1,64,128,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,1,64,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,1,64,128,1,float16,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,1,64,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,1,64,128,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,1,64,0,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,2,64,128,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,2,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,2,64,128,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,2,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,2,64,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,2,64,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,1,64,128,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,1,64,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,1,64,128,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,1,64,128,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,1,64,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,1,64,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,2,64,128,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,2,64,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,2,64,128,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,2,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,2,64,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,2,64,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,1,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,1,64,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,1,64,128,1,float16,fp8,0,0.015882667154073715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,1,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,1,64,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,1,64,0,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,2,64,128,1,float16,float16,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,2,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,2,64,128,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,2,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,2,64,0,1,float16,fp8,0,0.016037333756685257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,2,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,1,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,1,64,0,1,float16,float16,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,1,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,1,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,1,64,0,1,float16,fp8,0,0.016399999459584553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,1,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,2,64,128,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,2,64,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,2,64,128,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,2,64,128,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,2,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,2,64,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,1,64,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,1,64,128,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,1,64,128,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,1,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,1,64,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,1,64,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,2,64,128,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,2,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,2,64,128,1,float16,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,2,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,2,64,0,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,2,64,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,1,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,1,64,0,1,float16,float16,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,1,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,1,64,128,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,1,64,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,1,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,2,64,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,2,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,2,64,128,1,float16,float16,0,0.015989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,2,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,2,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,2,64,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,1,64,128,1,float16,float16,0,0.015743999431530636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,1,64,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,1,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,1,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,1,64,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,2,64,128,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,1,64,0,1,float16,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,2,64,0,1,float16,float16,0,0.013839999834696451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,2,64,128,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,2,64,128,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,2,64,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,2,64,0,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,1,64,128,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,1,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,1,64,128,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,1,64,128,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,1,64,0,1,float16,fp8,0,0.01469333345691363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,1,64,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,2,1,64,0,1,float16,float16,0,0.02418133368094762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,2,1,64,128,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,2,1,64,128,1,float16,float16,0,0.023957334458827972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,2,1,64,128,1,fp8,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,2,1,64,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,2,1,64,0,1,fp8,fp8,0,0.023610666394233704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,2,64,0,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,2,64,128,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,2,64,128,1,float16,fp8,0,0.01766933376590411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,2,64,128,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,2,64,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,2,64,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,1,64,128,1,float16,float16,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,1,64,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,1,64,128,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,1,64,128,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,1,64,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,2,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,1,64,0,1,fp8,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,2,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,2,64,128,1,float16,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,2,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,2,64,128,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,2,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,1,64,128,1,float16,float16,0,0.016202667107184727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,1,64,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,1,64,128,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,1,64,128,1,fp8,fp8,0,0.016016000260909397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,1,64,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,1,64,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,2,64,128,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,2,64,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,2,64,128,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,2,64,128,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,2,64,0,1,float16,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,2,64,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,1,64,128,1,float16,float16,0,0.015530666957298914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,1,64,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,1,64,128,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,1,64,128,1,fp8,fp8,0,0.015573333948850632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,1,64,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,1,64,0,1,fp8,fp8,0,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,2,64,128,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,2,64,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,2,64,128,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,2,64,128,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,2,64,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,2,64,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,1,64,128,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,1,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,1,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,1,64,128,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,1,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,1,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,2,64,128,1,float16,float16,0,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,2,64,0,1,float16,float16,0,0.014767999450365702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,2,64,128,1,float16,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,2,64,128,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,2,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,2,64,0,1,fp8,fp8,0,0.014853333433469137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,1,64,128,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,1,64,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,1,64,128,1,float16,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,1,64,128,1,fp8,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,1,64,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,1,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,2,64,128,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,2,64,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,2,64,128,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,2,64,128,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,2,64,0,1,float16,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,2,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,1,64,128,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,1,64,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,1,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,1,64,128,1,fp8,fp8,0,0.014773332824309668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,1,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,1,64,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,2,64,128,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,2,64,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,2,64,128,1,float16,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,2,64,128,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,2,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,2,64,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,1,64,128,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,1,64,128,1,float16,fp8,0,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,1,64,128,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,1,64,0,1,float16,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,2,64,128,1,float16,float16,0,0.014677333335081736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,1,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,2,64,0,1,float16,float16,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,2,64,128,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,2,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,2,64,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,2,64,0,1,fp8,fp8,0,0.013978666315476099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,1,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,1,64,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,1,64,128,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,1,64,128,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,1,64,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,1,64,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,0,0.02314666658639908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,0,0.022042666872342426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,2,1,64,128,1,fp8,fp8,0,0.023578666150569916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,2,1,64,0,1,fp8,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,2,64,128,1,float16,float16,0,0.01770666614174843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,2,64,128,1,float16,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,2,64,128,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,2,64,0,1,fp8,fp8,0,0.017887999614079792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,0,0.018826667219400406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,1,64,128,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,2,64,128,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,1,64,0,1,fp8,fp8,0,0.0179626668492953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,2,64,128,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,2,64,128,1,fp8,fp8,0,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,2,64,0,1,fp8,fp8,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,1,64,128,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,2,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,1,64,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,2,64,128,1,float16,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,2,64,128,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,0,0.016458666572968166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,2,64,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,0,0.015664000064134598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,1,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,2,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,2,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,2,64,128,1,fp8,fp8,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,2,64,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,1,64,128,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,1,64,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,2,64,128,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,2,64,128,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,2,64,128,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,2,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,1,64,128,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,1,64,0,1,fp8,fp8,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,2,64,128,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,2,64,128,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,2,64,128,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,2,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,1,64,128,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,0,0.014778666198253632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,1,64,0,1,fp8,fp8,0,0.01434133326013883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,2,64,128,1,float16,float16,0,0.014736000448465347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,2,64,128,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,2,64,128,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,2,64,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,1,64,128,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,2,64,128,1,float16,float16,0,0.014325333138306936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,1,64,0,1,fp8,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,2,64,128,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,2,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,2,64,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,0,0.01463466634353002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,1,64,128,1,fp8,fp8,0,0.014698666830857595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,1,64,0,1,fp8,fp8,0,0.014789332946141561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,float16,0,0.04386133452256521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,1,1,64,128,1,float16,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,float16,0,0.23388266563415527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,1,1,64,128,1,fp8,fp8,0,0.044112001856168113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,1,1,64,0,1,float16,fp8,0,0.23478933175404867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,1,1,64,0,1,fp8,fp8,0,0.20383999745051065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,float16,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,1,1,64,128,1,float16,fp8,0,0.031301334500312805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,float16,0,0.1572213371594747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,1,1,64,128,1,fp8,fp8,0,0.03159466634194056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,1,1,64,0,1,float16,fp8,0,0.15869866808255514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,1,1,64,0,1,fp8,fp8,0,0.13793067137400308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,float16,0,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,1,1,64,128,1,float16,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,float16,0,0.15451199809710184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,1,1,64,128,1,fp8,fp8,0,0.02975466599067052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,float16,0,0.040037333965301514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,1,1,64,0,1,float16,fp8,0,0.15481066703796387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,1,1,64,0,1,fp8,fp8,0,0.1360106666882833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,1,1,64,128,1,float16,fp8,0,0.04179200033346812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,1,1,64,128,1,fp8,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,float16,0,0.15449066956837973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,float16,0,0.02972800036271413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,1,1,64,0,1,float16,fp8,0,0.15453867117563883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,1,1,64,0,1,fp8,fp8,0,0.136272003253301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,1,1,64,128,1,float16,fp8,0,0.027669332921504974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,1,1,64,128,1,fp8,fp8,0,0.028186666468779247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,float16,0,0.12401066223780315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,float16,0,0.025770666698614757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,1,1,64,0,1,float16,fp8,0,0.12171199917793274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,1,1,64,0,1,fp8,fp8,0,0.10737599929173787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,1,1,64,128,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,1,1,64,128,1,fp8,fp8,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,float16,0,0.12147200107574463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,1,1,64,0,1,float16,fp8,0,0.12164800365765889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,1,1,64,0,1,fp8,fp8,0,0.10551466544469197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,float16,0,0.037402667105197906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,1,1,64,128,1,float16,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,1,1,64,128,1,fp8,fp8,0,0.035936000446478523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,float16,0,0.11763200163841248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,float16,0,0.02722666660944621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,1,1,64,0,1,float16,fp8,0,0.11788800358772278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,1,1,64,0,1,fp8,fp8,0,0.10360532999038696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,1,1,64,128,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,float16,0,0.10468799869219463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,1,1,64,128,1,fp8,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,float16,0,0.025648000339667004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,1,1,64,0,1,float16,fp8,0,0.10528533657391866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,1,1,64,0,1,fp8,fp8,0,0.09170666337013245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,1,1,64,128,1,float16,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,float16,0,0.10345600048700969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,1,1,64,0,1,float16,fp8,0,0.10319999853769939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,1,1,64,128,1,fp8,fp8,0,0.02585600068171819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,1,1,64,0,1,fp8,fp8,0,0.09074667096138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,float16,0,0.042463997999827065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,float16,0,0.13607466220855713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,1,1,64,128,1,float16,fp8,0,0.04446400205294291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,1,1,64,128,1,fp8,fp8,0,0.043525333205858864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,1,1,64,0,1,float16,fp8,0,0.13946666320165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,1,1,64,0,1,fp8,fp8,0,0.12198399504025777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,float16,0,0.029706666866938274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,float16,0,0.09313600262006123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,1,1,64,128,1,float16,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,1,1,64,128,1,fp8,fp8,0,0.029189333319664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,1,1,64,0,1,float16,fp8,0,0.09335999687512715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,1,1,64,0,1,fp8,fp8,0,0.08269333342711131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,float16,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,float16,0,0.08854933579762776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,1,1,64,128,1,float16,fp8,0,0.025605333348115284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,1,1,64,128,1,fp8,fp8,0,0.02532266577084859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,1,1,64,0,1,float16,fp8,0,0.0888159970442454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,1,1,64,0,1,fp8,fp8,0,0.07832000156243642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,float16,0,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,float16,0,0.08505066235860188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,1,1,64,128,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,1,1,64,128,1,fp8,fp8,0,0.02290133386850357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,1,1,64,0,1,fp8,fp8,0,0.07480533421039581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,1,1,64,0,1,float16,fp8,0,0.08727999528249104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,float16,0,0.03941866755485535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,float16,0,0.09366933504740398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,1,1,64,128,1,float16,fp8,0,0.03953066716591517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,1,1,64,128,1,fp8,fp8,0,0.037776000797748566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,1,1,64,0,1,float16,fp8,0,0.09697600205739339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,1,1,64,0,1,fp8,fp8,0,0.08529599507649739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,float16,0,0.027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,float16,0,0.0726453314224879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,1,1,64,128,1,float16,fp8,0,0.02775466690460841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,1,1,64,128,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,1,1,64,0,1,float16,fp8,0,0.07449600100517273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,1,1,64,0,1,fp8,fp8,0,0.06462400158246358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,float16,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,float16,0,0.06851733227570851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,1,1,64,128,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,1,1,64,128,1,fp8,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,1,1,64,0,1,float16,fp8,0,0.07065600156784058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,1,1,64,0,1,fp8,fp8,0,0.062133332093556724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,float16,0,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,float16,0,0.0693280001481374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,1,1,64,128,1,float16,fp8,0,0.021967999637126923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,1,1,64,128,1,fp8,fp8,0,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,1,1,64,0,1,float16,fp8,0,0.0683786670366923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,1,1,64,0,1,fp8,fp8,0,0.06007466713587443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,float16,0,0.04224533339341482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,float16,0,0.08711999654769897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,1,1,64,128,1,float16,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,1,1,64,128,1,fp8,fp8,0,0.042709335684776306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,1,1,64,0,1,float16,fp8,0,0.08942400415738423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,1,1,64,0,1,fp8,fp8,0,0.08087466657161713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,float16,0,0.029167999823888142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,1,1,64,128,1,float16,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,float16,0,0.058277333776156105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,1,1,64,128,1,fp8,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,1,1,64,0,1,fp8,fp8,0,0.05249066650867462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,1,1,64,0,1,float16,fp8,0,0.06036800146102905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,1,1,64,128,1,float16,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,float16,0,0.05236800014972687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,1,1,64,128,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,1,1,64,0,1,fp8,fp8,0,0.047824000318845115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,1,1,64,0,1,float16,fp8,0,0.0543039987484614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,float16,0,0.05208000044027964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,1,1,64,128,1,float16,fp8,0,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,1,1,64,128,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,1,1,64,0,1,float16,fp8,0,0.052154665191968284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,1,1,64,0,1,fp8,fp8,0,0.045968001087506614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,1,1,64,128,1,float16,fp8,0,0.02160533269246419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,1,1,64,128,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,float16,0,0.05003733436266581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,1,1,64,0,1,float16,fp8,0,0.05172266562779745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,1,1,64,0,1,fp8,fp8,0,0.04560533165931702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,float16,0,0.040133332212766014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,1,1,64,128,1,float16,fp8,0,0.041290665666262306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,float16,0,0.06259733438491821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,1,1,64,128,1,fp8,fp8,0,0.038133333126703896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,1,1,64,0,1,fp8,fp8,0,0.05827199916044871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,1,1,64,0,1,float16,fp8,0,0.06483200192451477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,1,1,64,128,1,float16,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,float16,0,0.04780800143877665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,1,1,64,128,1,fp8,fp8,0,0.025813333690166473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,1,1,64,0,1,float16,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,float16,0,0.023562667270501454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,1,1,64,0,1,fp8,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,float16,0,0.04390933116277059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,1,1,64,128,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,1,1,64,128,1,fp8,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,1,1,64,0,1,float16,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,1,1,64,0,1,fp8,fp8,0,0.039781334499518074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,float16,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,float16,0,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,1,1,64,128,1,fp8,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,1,1,64,128,1,float16,fp8,0,0.021525333325068157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,1,1,64,0,1,float16,fp8,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,float16,0,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,1,1,64,0,1,fp8,fp8,0,0.03786666691303253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,float16,0,0.04192000130812327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,1,1,64,128,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,1,1,64,128,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,1,1,64,0,1,float16,fp8,0,0.042021334171295166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,1,1,64,0,1,fp8,fp8,0,0.037903999288876854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,float16,0,0.04354666670163473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,float16,0,0.06346666812896729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,1,1,64,128,1,float16,fp8,0,0.04427200059096018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,1,1,64,128,1,fp8,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,1,1,64,0,1,float16,fp8,0,0.064410666624705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,float16,0,0.02926933268706004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,1,1,64,0,1,fp8,fp8,0,0.05843733251094818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,float16,0,0.04195199906826019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,1,1,64,128,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,1,1,64,128,1,fp8,fp8,0,0.029946667452653248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,1,1,64,0,1,float16,fp8,0,0.043680002291997276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,1,1,64,0,1,fp8,fp8,0,0.039919999738534294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,float16,0,0.03711466739575068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,1,1,64,128,1,float16,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,1,1,64,128,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,1,1,64,0,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,1,1,64,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,float16,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,float16,0,0.0336053321758906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,1,1,64,128,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,1,1,64,128,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,1,1,64,0,1,float16,fp8,0,0.03548266738653183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,1,1,64,0,1,fp8,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,float16,0,0.033413333197434746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,1,1,64,128,1,float16,fp8,0,0.02088533341884613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,1,1,64,128,1,fp8,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,1,1,64,0,1,float16,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,1,1,64,0,1,fp8,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,float16,0,0.02094399929046631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,float16,0,0.0334346666932106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,1,1,64,128,1,float16,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,1,1,64,128,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,1,1,64,0,1,float16,fp8,0,0.03345600018898646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,1,1,64,0,1,fp8,fp8,0,0.029152000943819683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,float16,0,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,float16,0,0.048207998275756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,1,1,64,128,1,float16,fp8,0,0.03956266740957896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,1,1,64,0,1,float16,fp8,0,0.04929600159327189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,1,1,64,128,1,fp8,fp8,0,0.039893334110577904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,1,1,64,0,1,fp8,fp8,0,0.046069333950678505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,float16,0,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,float16,0,0.03545066714286804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,1,1,64,128,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,1,1,64,128,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,1,1,64,0,1,float16,fp8,0,0.035530666510264076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,1,1,64,0,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,float16,0,0.02162133405605952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,float16,0,0.0315786674618721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,1,1,64,128,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,1,1,64,128,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,1,1,64,0,1,float16,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,1,1,64,0,1,fp8,fp8,0,0.028399998943010967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,1,1,64,128,1,float16,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,1,1,64,128,1,fp8,fp8,0,0.02089066555102666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,1,1,64,0,1,float16,fp8,0,0.031290667752424874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,1,1,64,0,1,fp8,fp8,0,0.027935999135176342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,float16,0,0.02932800104220708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,1,1,64,128,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,1,1,64,128,1,fp8,fp8,0,0.01969066634774208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,1,1,64,0,1,float16,fp8,0,0.0296426663796107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,float16,0,0.01971199984351794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,1,1,64,0,1,fp8,fp8,0,0.02769600103298823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,float16,0,0.02770666778087616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,1,1,64,128,1,float16,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,1,1,64,128,1,fp8,fp8,0,0.019754666835069656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,1,1,64,0,1,float16,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,1,1,64,0,1,fp8,fp8,0,0.025621332228183746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,float16,0,0.036288000643253326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,float16,0,0.04404800136884054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,1,1,64,128,1,float16,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,1,1,64,0,1,float16,fp8,0,0.04417066772778829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,1,1,64,128,1,fp8,fp8,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,1,1,64,0,1,fp8,fp8,0,0.04009599983692169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,1,1,64,128,1,float16,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,float16,0,0.029834667841593426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,1,1,64,128,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,1,1,64,0,1,float16,fp8,0,0.030832000076770782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,1,1,64,0,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,float16,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,float16,0,0.02714666724205017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,1,1,64,128,1,float16,fp8,0,0.02236266682545344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,1,1,64,128,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,1,1,64,0,1,float16,fp8,0,0.025744001070658367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,1,1,64,0,1,fp8,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,float16,0,0.024383999407291412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,1,1,64,128,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,1,1,64,0,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,1,1,64,128,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,1,1,64,0,1,fp8,fp8,0,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,float16,0,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,float16,0,0.024517332514127094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,1,1,64,128,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,1,1,64,128,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,1,1,64,0,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,1,1,64,0,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,float16,0,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,float16,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,1,1,64,128,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,1,1,64,128,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,1,1,64,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,1,1,64,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,float16,0,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,1,1,64,128,1,float16,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,1,1,64,128,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,1,1,64,0,1,float16,fp8,0,0.023498666783173878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,1,1,64,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,1,1,64,128,1,float16,float16,0,0.036357333262761436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,1,1,64,0,1,float16,float16,0,0.038032000263532005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,1,1,64,128,1,float16,fp8,0,0.03586133321126302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,1,1,64,0,1,float16,fp8,0,0.037503999968369804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,1,1,64,128,1,fp8,fp8,0,0.036357333262761436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,1,1,64,0,1,fp8,fp8,0,0.03597866743803024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,1,1,64,128,1,float16,float16,0,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,1,1,64,0,1,float16,float16,0,0.025850666066010792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,1,1,64,128,1,float16,fp8,0,0.025941332181294758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,1,1,64,128,1,fp8,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,1,1,64,0,1,float16,fp8,0,0.025418666501839954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,1,1,64,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,1,1,64,0,1,float16,float16,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,1,1,64,128,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,1,1,64,128,1,float16,float16,0,0.022463999688625336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,1,1,64,128,1,fp8,fp8,0,0.0216799999276797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,1,1,64,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,1,1,64,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,1,1,64,128,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,1,1,64,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,1,1,64,128,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,1,1,64,128,1,fp8,fp8,0,0.020074666788180668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,1,1,64,0,1,float16,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,1,1,64,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,1,1,64,128,1,float16,float16,0,0.01960533360640208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,1,1,64,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,1,1,64,128,1,float16,fp8,0,0.02056533346573512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,1,1,64,128,1,fp8,fp8,0,0.020831999679406483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,1,1,64,0,1,float16,fp8,0,0.020794666061798733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,1,1,64,0,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,1,1,64,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,1,1,64,128,1,float16,float16,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,1,1,64,128,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,1,1,64,128,1,fp8,fp8,0,0.018005333840847015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,1,1,64,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,1,1,64,0,1,fp8,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,1,1,64,128,1,float16,float16,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,1,1,64,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,1,1,64,128,1,float16,fp8,0,0.019509332875410717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,1,1,64,128,1,fp8,fp8,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,1,1,64,0,1,fp8,fp8,0,0.017786666750907898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,1,1,64,0,1,float16,fp8,0,0.019674666225910187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,1,1,64,128,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,1,1,64,0,1,float16,float16,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,1,1,64,128,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,1,1,64,128,1,fp8,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,1,1,64,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,1,1,64,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,1,1,64,128,1,float16,float16,0,0.023578666150569916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,1,1,64,0,1,float16,float16,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,1,1,64,128,1,float16,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,1,1,64,128,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,1,1,64,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,1,1,64,0,1,fp8,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,1,1,64,128,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,1,1,64,0,1,float16,float16,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,1,1,64,128,1,float16,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,1,1,64,128,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,1,1,64,0,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,1,1,64,0,1,fp8,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,1,1,64,128,1,float16,float16,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,1,1,64,0,1,float16,float16,0,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,1,1,64,128,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,1,1,64,128,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,1,1,64,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,1,1,64,0,1,fp8,fp8,0,0.01773333301146825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,1,1,64,128,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,1,1,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,1,1,64,128,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,1,1,64,128,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,1,1,64,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,1,1,64,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,1,1,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,1,1,64,0,1,float16,float16,0,0.01781333362062772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,1,1,64,128,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,1,1,64,128,1,fp8,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,1,1,64,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,1,1,64,0,1,fp8,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,1,1,64,128,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,1,1,64,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,1,1,64,128,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,1,1,64,128,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,1,1,64,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,1,1,64,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,1,1,64,128,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,1,1,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,1,1,64,128,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,1,1,64,128,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,1,1,64,0,1,float16,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,1,1,64,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,1,1,64,128,1,float16,float16,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,1,1,64,0,1,float16,float16,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,1,1,64,128,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,1,1,64,128,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,1,1,64,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,1,1,64,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,1,1,64,128,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,1,1,64,0,1,float16,float16,0,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,1,1,64,128,1,float16,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,1,1,64,128,1,fp8,fp8,0,0.018330667167901993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,1,1,64,0,1,float16,fp8,0,0.018453333526849747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,1,1,64,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,1,1,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,1,1,64,0,1,float16,float16,0,0.01590399940808614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,1,1,64,128,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,1,1,64,128,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,1,1,64,0,1,float16,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,1,1,64,128,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,1,1,64,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,1,1,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,1,1,64,128,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,1,1,64,128,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,1,1,64,128,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,1,1,64,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,1,1,64,0,1,float16,fp8,0,0.0162773331006368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,1,1,64,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,1,1,64,128,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,1,1,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,1,1,64,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,1,1,64,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,1,1,64,128,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,1,1,64,0,1,float16,float16,0,0.015749332805474598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,1,1,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,1,1,64,128,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,1,1,64,0,1,float16,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,1,1,64,0,1,fp8,fp8,0,0.01624533285697301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,1,1,64,128,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,1,1,64,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,1,1,64,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,1,1,64,128,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,1,1,64,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,1,1,64,0,1,fp8,fp8,0,0.01578666642308235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,1,1,64,128,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,1,1,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,1,1,64,128,1,float16,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,1,1,64,128,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,1,1,64,0,1,float16,fp8,0,0.01632000009218852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,1,1,64,128,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,1,1,64,0,1,fp8,fp8,0,0.014778666198253632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,1,1,64,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,1,1,64,128,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,1,1,64,128,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,1,1,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,1,1,64,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,1,1,64,128,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,1,1,64,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,1,1,64,128,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,1,1,64,128,1,fp8,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,1,1,64,0,1,float16,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,1,1,64,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,1,1,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,1,1,64,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,1,1,64,128,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,1,1,64,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,1,1,64,128,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,1,1,64,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,1,1,64,128,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,1,1,64,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,1,1,64,128,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,1,1,64,128,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,1,1,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,1,1,64,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,1,1,64,128,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,1,1,64,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,1,1,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,1,1,64,128,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,1,1,64,0,1,float16,fp8,0,0.01581866666674614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,1,1,64,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,1,1,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,1,1,64,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,1,1,64,128,1,float16,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,1,1,64,128,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,1,1,64,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,1,1,64,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,1,1,64,128,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,1,1,64,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,1,1,64,128,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,1,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,1,1,64,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,1,1,64,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,1,1,64,128,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,1,1,64,0,1,float16,float16,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,1,1,64,128,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,1,1,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,1,1,64,0,1,float16,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,1,1,64,0,1,fp8,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,1,1,64,128,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,1,1,64,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,1,1,64,128,1,float16,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,1,1,64,128,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,1,1,64,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,1,1,64,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,1,1,64,128,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,1,1,64,0,1,float16,float16,0,0.017583999782800674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,1,1,64,128,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,1,1,64,128,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,1,1,64,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,1,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,1,1,64,128,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,1,1,64,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,1,1,64,128,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,1,1,64,128,1,fp8,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,1,1,64,0,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,1,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,1,1,64,128,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,1,1,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,1,1,64,128,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,1,1,64,128,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,1,1,64,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,1,1,64,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,1,1,64,128,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,1,1,64,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,1,1,64,128,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,1,1,64,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,1,1,64,128,1,fp8,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,1,1,64,128,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,1,1,64,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,1,1,64,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,1,1,64,128,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,1,1,64,128,1,float16,fp8,0,0.01595199977358182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,1,1,64,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,1,1,64,0,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,1,1,64,128,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,1,1,64,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,1,1,64,128,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,1,1,64,128,1,fp8,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,1,1,64,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,1,1,64,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,1,1,64,128,1,float16,float16,0,0.014762666076421738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,1,1,64,128,1,float16,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,1,1,64,0,1,float16,float16,0,0.013914667069911957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,1,1,64,128,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,1,1,64,0,1,float16,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,1,1,64,0,1,fp8,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,1,1,64,128,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,1,1,64,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,1,1,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,1,1,64,128,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,1,1,64,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,1,1,64,0,1,fp8,fp8,0,0.015647999942302704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,1,1,64,128,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,1,1,64,0,1,float16,float16,0,0.016693333784739178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,1,1,64,128,1,float16,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,1,1,64,128,1,fp8,fp8,0,0.015919999529918034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,1,1,64,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,1,1,64,128,1,float16,float16,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,1,1,64,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,1,1,64,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,1,1,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,1,1,64,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,1,1,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,1,1,64,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,1,1,64,128,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,1,1,64,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,1,1,64,128,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,1,1,64,128,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,1,1,64,0,1,float16,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,1,1,64,128,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,1,1,64,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,1,1,64,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,1,1,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,1,1,64,128,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,1,1,64,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,1,1,64,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,1,1,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,1,1,64,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,1,1,64,128,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,1,1,64,128,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,1,1,64,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,1,1,64,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,1,1,64,128,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,1,1,64,0,1,float16,float16,0,0.01481066644191742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,1,1,64,128,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,1,1,64,128,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,1,1,64,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,1,1,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,1,1,64,0,1,float16,float16,0,0.01313599944114685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,1,1,64,128,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,1,1,64,128,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,1,1,64,0,1,float16,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,1,1,64,128,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,1,1,64,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,1,1,64,128,1,float16,float16,0,0.013221333424250284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,1,1,64,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,1,1,64,128,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,1,1,64,128,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,1,1,64,0,1,float16,fp8,0,0.01441066712141037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,1,1,64,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,1,1,64,128,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,1,1,64,128,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,1,1,64,128,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,1,1,64,0,1,fp8,fp8,0,0.01629866659641266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,1,1,64,128,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,1,1,64,128,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,1,1,64,128,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,1,1,64,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,1,1,64,128,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,1,1,64,128,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,1,1,64,128,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,1,1,64,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,1,1,64,128,1,float16,float16,0,0.01369599997997284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,1,1,64,128,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,1,1,64,128,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,1,1,64,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,1,1,64,128,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,1,1,64,128,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,1,1,64,128,1,fp8,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,1,1,64,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,1,1,64,128,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,0,0.014805333067973455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,1,1,64,128,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,1,1,64,128,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,1,1,64,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,1,1,64,128,1,float16,float16,0,0.014837333311637243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,1,1,64,128,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,1,1,64,128,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,1,1,64,0,1,fp8,fp8,0,0.014671999961137772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,1,1,64,128,1,float16,float16,0,0.012991999586423239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,0,0.01403733342885971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,1,1,64,128,1,float16,fp8,0,0.014831999937693277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,1,1,64,128,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,0,0.01471466695268949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,1,1,64,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,1,128,0,1,fp8,fp8,0,27.144363403320312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,float16,0,36.03789774576823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,1,128,0,1,float16,fp8,0,36.11066691080729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,float16,0,36.33916727701823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,4,128,0,1,fp8,fp8,0,26.63279978434245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,4,128,0,1,float16,fp8,0,36.32450612386068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,float16,0,36.01159413655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,8,128,0,1,float16,fp8,0,35.328905741373696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,float16,0,18.462117513020832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,96,8,128,0,1,fp8,fp8,0,27.11468760172526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,96,128,0,1,float16,fp8,0,18.872907002766926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,float16,0,17.918895721435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,1,128,0,1,fp8,fp8,0,13.555642445882162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,1,128,0,1,float16,fp8,0,18.05546696980794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,float16,0,18.433034261067707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,4,128,0,1,fp8,fp8,0,13.638741811116537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,4,128,0,1,float16,fp8,0,17.956016540527344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,float16,0,17.690272013346355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,8,128,0,1,fp8,fp8,0,13.455088297526041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,96,8,128,0,1,float16,fp8,0,18.583146413167317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,float16,0,9.376058578491211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,96,128,0,1,float16,fp8,0,9.759056091308594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,96,128,0,1,fp8,fp8,0,7.237925211588542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,1,128,0,1,fp8,fp8,0,6.935146967569987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,float16,0,9.173088073730469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,1,128,0,1,float16,fp8,0,9.072709401448568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,float16,0,8.922320048014322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,4,128,0,1,fp8,fp8,0,6.962490717569987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,4,128,0,1,float16,fp8,0,9.393807729085287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,float16,0,9.072336196899414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,8,128,0,1,fp8,fp8,0,6.976559956868489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,96,8,128,0,1,float16,fp8,0,8.831855773925781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,float16,0,4.7644907633463545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,96,128,0,1,float16,fp8,0,4.835365295410156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,96,128,0,1,fp8,fp8,0,3.8809385299682617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,1,128,0,1,fp8,fp8,0,3.7323999404907227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,float16,0,4.453189214070638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,1,128,0,1,float16,fp8,0,4.565818786621094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,4,128,0,1,fp8,fp8,0,3.7356319427490234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,float16,0,4.618101437886556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,4,128,0,1,float16,fp8,0,4.601807912190755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,float16,0,4.533450762430827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,8,128,0,1,fp8,fp8,0,3.7376426060994468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,96,8,128,0,1,float16,fp8,0,4.673136075337728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,1,128,0,1,fp8,fp8,0,16.05728022257487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,fp8,0,20.087744394938152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,1,128,0,1,float16,float16,0,20.274917602539062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,4,128,0,1,fp8,fp8,0,15.877530415852865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,float16,0,20.50812276204427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,4,128,0,1,float16,fp8,0,20.207312266031902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,8,128,0,1,fp8,fp8,0,16.07806905110677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,float16,0,20.41638437906901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,96,8,128,0,1,float16,fp8,0,19.973220825195312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,96,128,0,1,fp8,fp8,0,8.645733515421549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,float16,0,11.35040028889974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,96,128,0,1,float16,fp8,0,11.167743682861328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,1,128,0,1,fp8,fp8,0,8.060629526774088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,float16,0,10.55842145284017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,1,128,0,1,float16,fp8,0,10.56058120727539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,4,128,0,1,fp8,fp8,0,8.090864181518555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,float16,0,10.356437047322592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,4,128,0,1,float16,fp8,0,10.67413330078125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,8,128,0,1,fp8,fp8,0,8.079530715942383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,float16,0,10.48039436340332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,96,8,128,0,1,float16,fp8,0,10.712443033854166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,float16,0,5.651338577270508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,96,128,0,1,fp8,fp8,0,4.4581600824991865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,96,128,0,1,float16,fp8,0,6.0499521891276045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,float16,0,5.271509488423665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,1,128,0,1,fp8,fp8,0,4.20304520924886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,1,128,0,1,float16,fp8,0,5.206000010172526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,4,128,0,1,fp8,fp8,0,4.2266238530476885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,float16,0,5.42843755086263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,4,128,0,1,float16,fp8,0,5.223589261372884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,8,128,0,1,fp8,fp8,0,4.237039883931478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,float16,0,5.298437436421712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,96,8,128,0,1,float16,fp8,0,5.61359977722168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,float16,0,2.901439984639486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,96,128,0,1,fp8,fp8,0,2.4399520556131997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,96,128,0,1,float16,fp8,0,2.9265387852986655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,float16,0,2.733375867207845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,1,128,0,1,float16,fp8,0,2.7170400619506836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,1,128,0,1,fp8,fp8,0,2.320741335550944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,float16,0,2.797178586324056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,4,128,0,1,float16,fp8,0,2.7442506154378257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,4,128,0,1,fp8,fp8,0,2.351909319559733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,float16,0,2.7649014790852866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,8,128,0,1,float16,fp8,0,2.769322713216146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,96,8,128,0,1,fp8,fp8,0,2.33353598912557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,1,128,0,1,fp8,fp8,0,11.457893371582031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,float16,0,14.570927937825521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,1,128,0,1,float16,fp8,0,14.67409642537435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,4,128,0,1,fp8,fp8,0,11.689664204915365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,float16,0,15.274650573730469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,4,128,0,1,float16,fp8,0,15.401908874511719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,float16,0,15.131701151529947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,8,128,0,1,fp8,fp8,0,11.587679545084635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,96,8,128,0,1,float16,fp8,0,15.040740966796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,96,128,0,1,fp8,fp8,0,6.28166389465332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,float16,0,8.121642430623373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,96,128,0,1,float16,fp8,0,8.035018920898438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,1,128,0,1,fp8,fp8,0,5.9521439870198565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,float16,0,7.335376103719075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,1,128,0,1,float16,fp8,0,7.369418462117513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,float16,0,7.511903762817383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,4,128,0,1,fp8,fp8,0,5.884954452514648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,4,128,0,1,float16,fp8,0,7.498463948567708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,8,128,0,1,fp8,fp8,0,5.942202885945638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,fp8,0,7.513674418131511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,96,8,128,0,1,float16,float16,0,7.395647684733073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,float16,0,4.043557484944661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,96,128,0,1,fp8,fp8,0,3.3328959147135415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,96,128,0,1,float16,fp8,0,4.16702397664388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,float16,0,3.8030080795288086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,1,128,0,1,float16,fp8,0,3.806485176086426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,1,128,0,1,fp8,fp8,0,3.107802708943685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,4,128,0,1,fp8,fp8,0,3.1267147064208984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,float16,0,3.7683092753092446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,4,128,0,1,float16,fp8,0,3.813098589579264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,float16,0,3.7566134134928384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,8,128,0,1,fp8,fp8,0,3.135242780049642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,96,8,128,0,1,float16,fp8,0,3.8150294621785483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,float16,0,2.1745333671569824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,96,128,0,1,float16,fp8,0,2.206826686859131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,96,128,0,1,fp8,fp8,0,2.0123680432637534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,1,128,0,1,fp8,fp8,0,1.7389119466145833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,fp8,0,1.9964799880981445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,1,128,0,1,float16,float16,0,2.0132106145222983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,4,128,0,1,fp8,fp8,0,1.7453920046488445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,fp8,0,2.011077404022217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,4,128,0,1,float16,float16,0,2.041898727416992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,8,128,0,1,fp8,fp8,0,1.796229362487793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,float16,0,2.0321332613627114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,96,8,128,0,1,float16,fp8,0,2.0646559397379556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,1,128,0,1,fp8,fp8,0,15.592027028401693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,float16,0,20.052613576253254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,1,128,0,1,float16,fp8,0,20.115285237630207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,float16,0,19.74179204305013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,4,128,0,1,fp8,fp8,0,15.71018091837565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,4,128,0,1,float16,fp8,0,20.82906723022461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,float16,0,19.9375737508138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,float16,0,10.650480270385742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,8,128,0,1,fp8,fp8,0,15.77340825398763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,96,128,0,1,float16,fp8,0,11.046629587809244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,96,8,128,0,1,float16,fp8,0,19.710277557373047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,float16,0,10.145146687825521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,1,128,0,1,float16,fp8,0,10.281978607177734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,1,128,0,1,fp8,fp8,0,7.84007453918457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,fp8,0,9.750965118408203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,4,128,0,1,float16,float16,0,10.104554494222006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,4,128,0,1,fp8,fp8,0,7.857840220133464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,fp8,0,9.833983739217123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,8,128,0,1,float16,float16,0,10.042373021443685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,96,8,128,0,1,fp8,fp8,0,7.887952168782552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,float16,0,5.436666488647461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,96,128,0,1,float16,fp8,0,5.510800043741862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,96,128,0,1,fp8,fp8,0,4.375162760416667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,float16,0,5.001887957255046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,1,128,0,1,float16,fp8,0,5.05895455678304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,1,128,0,1,fp8,fp8,0,4.04965337117513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,float16,0,5.096170743306478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,4,128,0,1,float16,fp8,0,5.023530642191569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,4,128,0,1,fp8,fp8,0,4.065626780192058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,float16,0,5.075215975443522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,8,128,0,1,float16,fp8,0,5.0629011789957685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,96,8,128,0,1,fp8,fp8,0,4.0794986089070635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,float16,0,2.8482453028361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,96,128,0,1,float16,fp8,0,2.8695360819498696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,96,128,0,1,fp8,fp8,0,2.3370772997538247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,float16,0,2.5881600379943848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,1,128,0,1,fp8,fp8,0,2.172191937764486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,1,128,0,1,float16,fp8,0,2.603461265563965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,float16,0,2.6060694058736167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,4,128,0,1,float16,fp8,0,2.5810720125834146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,4,128,0,1,fp8,fp8,0,2.185706615447998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,8,128,0,1,fp8,fp8,0,2.187605381011963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,fp8,0,2.575551986694336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,96,8,128,0,1,float16,float16,0,2.6048693656921387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,float16,0,1.5378665924072266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,96,128,0,1,float16,fp8,0,1.5518933931986492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,96,128,0,1,fp8,fp8,0,1.3124799728393555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,float16,0,1.4090666770935059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,1,128,0,1,float16,fp8,0,1.4160265922546387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,1,128,0,1,fp8,fp8,0,1.2314026355743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,float16,0,1.4264853795369465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,4,128,0,1,float16,fp8,0,1.4213973681132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,4,128,0,1,fp8,fp8,0,1.2367200056711833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,float16,0,1.419925371805827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,8,128,0,1,float16,fp8,0,1.429898738861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,96,8,128,0,1,fp8,fp8,0,1.2391733328501384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,1,128,0,1,fp8,fp8,0,9.65887451171875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,float16,0,11.65219243367513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,1,128,0,1,float16,fp8,0,12.036351521809896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,4,128,0,1,fp8,fp8,0,9.703866958618164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,float16,0,11.847381591796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,4,128,0,1,float16,fp8,0,11.968858083089193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,8,128,0,1,fp8,fp8,0,9.743408203125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,float16,0,12.078266143798828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,96,8,128,0,1,float16,fp8,0,12.035748799641928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,float16,0,6.608757019042969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,96,128,0,1,fp8,fp8,0,5.462453206380208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,96,128,0,1,float16,fp8,0,6.764533360799153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,float16,0,5.910896301269531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,1,128,0,1,fp8,fp8,0,4.907253265380859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,1,128,0,1,float16,fp8,0,5.919333140055339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,float16,0,5.883850733439128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,4,128,0,1,fp8,fp8,0,4.942069371541341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,4,128,0,1,float16,fp8,0,5.993466695149739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,8,128,0,1,fp8,fp8,0,4.945957183837891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,fp8,0,6.105632146199544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,96,8,128,0,1,float16,float16,0,5.9319413503011065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,96,128,0,1,fp8,fp8,0,2.8190345764160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,float16,0,3.3944053649902344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,96,128,0,1,float16,fp8,0,3.4392267862955728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,float16,0,3.0453227361043296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,1,128,0,1,float16,fp8,0,3.042597452799479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,1,128,0,1,fp8,fp8,0,2.5699146588643393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,4,128,0,1,fp8,fp8,0,2.5662506421407065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,float16,0,3.0539573033650718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,4,128,0,1,float16,fp8,0,3.0872586568196616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,float16,0,3.0700906117757163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,8,128,0,1,fp8,fp8,0,2.5758773485819497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,96,8,128,0,1,float16,fp8,0,3.088346799214681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,float16,0,1.8008640607198079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,96,128,0,1,float16,fp8,0,1.8045652707417805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,96,128,0,1,fp8,fp8,0,1.5426079432169597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,1,128,0,1,fp8,fp8,0,1.391983985900879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,float16,0,1.6035092671712239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,1,128,0,1,float16,fp8,0,1.616005261739095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,float16,0,1.6159626642862956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,4,128,0,1,fp8,fp8,0,1.4107093811035156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,4,128,0,1,float16,fp8,0,1.616085370381673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,float16,0,1.6228906313578289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,8,128,0,1,float16,fp8,0,1.6338186264038086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,96,8,128,0,1,fp8,fp8,0,1.3983999888102214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,96,128,0,1,fp8,fp8,0,0.8699893156687418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,float16,0,0.996293306350708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,96,128,0,1,float16,fp8,0,1.0400479634602864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,float16,0,0.9087573687235514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,1,128,0,1,float16,fp8,0,0.9118613402048746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,1,128,0,1,fp8,fp8,0,0.8100746472676595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,4,128,0,1,fp8,fp8,0,0.812058687210083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,float16,0,0.9153439998626709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,4,128,0,1,float16,fp8,0,0.9183839956919352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,8,128,0,1,fp8,fp8,0,0.8138879934946696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,float16,0,0.9217653274536133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,96,8,128,0,1,float16,fp8,0,0.921615997950236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,1,128,0,1,fp8,fp8,0,10.207626978556315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,float16,0,12.103978474934896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,1,128,0,1,float16,fp8,0,12.127024332682291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,4,128,0,1,fp8,fp8,0,10.25493303934733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,float16,0,12.232964833577475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,4,128,0,1,float16,fp8,0,12.326693216959635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,8,128,0,1,fp8,fp8,0,10.310127894083658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,float16,0,12.417364756266275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,96,8,128,0,1,float16,fp8,0,12.547684987386068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,float16,0,6.998325347900391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,96,128,0,1,float16,fp8,0,7.019509633382161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,float16,0,5.994309107462565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,1,128,0,1,float16,fp8,0,6.083589553833008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,1,128,0,1,fp8,fp8,0,5.091642697652181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,float16,0,6.035306930541992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,4,128,0,1,fp8,fp8,0,5.115349451700847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,4,128,0,1,float16,fp8,0,6.176736195882161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,float16,0,6.11628786722819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,8,128,0,1,float16,fp8,0,6.108240127563477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,float16,0,3.4992478688557944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,96,8,128,0,1,fp8,fp8,0,5.152688026428223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,96,128,0,1,float16,fp8,0,3.5507465998331704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,96,128,0,1,fp8,fp8,0,3.001194636027018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,float16,0,3.043349266052246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,1,128,0,1,float16,fp8,0,3.0700372060139975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,1,128,0,1,fp8,fp8,0,2.6194507280985513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,float16,0,3.116671880086263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,4,128,0,1,float16,fp8,0,3.091189384460449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,4,128,0,1,fp8,fp8,0,2.630095958709717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,float16,0,3.135749181111654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,8,128,0,1,float16,fp8,0,3.1222826639811196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,96,8,128,0,1,fp8,fp8,0,2.6473546028137207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,float16,0,1.824021339416504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,96,128,0,1,float16,fp8,0,1.8521119753519695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,96,128,0,1,fp8,fp8,0,1.5733973185221355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,float16,0,1.593930721282959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,1,128,0,1,float16,fp8,0,1.599679946899414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,1,128,0,1,fp8,fp8,0,1.427610715230306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,float16,0,1.6068800290425618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,4,128,0,1,float16,fp8,0,1.605141321818034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,4,128,0,1,fp8,fp8,0,1.3920159339904785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,float16,0,1.6114932696024578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,8,128,0,1,float16,fp8,0,1.624901294708252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,96,8,128,0,1,fp8,fp8,0,1.3982666333516438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,float16,0,0.9881546497344971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,96,128,0,1,float16,fp8,0,1.0000747044881184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,96,128,0,1,fp8,fp8,0,0.8630506992340088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,float16,0,0.8705440362294515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,1,128,0,1,float16,fp8,0,0.8760426839192709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,1,128,0,1,fp8,fp8,0,0.7719253698984782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,float16,0,0.8798399766286215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,4,128,0,1,float16,fp8,0,0.8817760149637858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,4,128,0,1,fp8,fp8,0,0.7737546761830648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,float16,0,0.8829013506571451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,8,128,0,1,float16,fp8,0,0.8888373374938965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,float16,0,0.5680960019429525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,96,8,128,0,1,fp8,fp8,0,0.7791573206583658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,96,128,0,1,fp8,fp8,0,0.5062666734059652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,96,128,0,1,float16,fp8,0,0.5784266789754232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,float16,0,0.5122559865315756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,1,128,0,1,float16,fp8,0,0.5124479929606119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,1,128,0,1,fp8,fp8,0,0.463370680809021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,float16,0,0.5175679922103882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,4,128,0,1,float16,fp8,0,0.5178560018539429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,4,128,0,1,fp8,fp8,0,0.4644800027211507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,float16,0,0.5174986521402994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,8,128,0,1,float16,fp8,0,0.5206986665725708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,96,8,128,0,1,fp8,fp8,0,0.4660586516062419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,float16,0,7.7770029703776045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,1,128,0,1,float16,fp8,0,7.723930358886719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,1,128,0,1,fp8,fp8,0,6.611183802286784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,float16,0,7.752927780151367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,4,128,0,1,float16,fp8,0,7.816752115885417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,4,128,0,1,fp8,fp8,0,6.640207926432292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,float16,0,7.884880065917969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,8,128,0,1,float16,fp8,0,7.903839747111003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,float16,0,4.445167859395345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,96,8,128,0,1,fp8,fp8,0,6.6876481374104815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,96,128,0,1,float16,fp8,0,4.507882754007976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,96,128,0,1,fp8,fp8,0,3.8312479654947915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,float16,0,3.8487094243367515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,1,128,0,1,float16,fp8,0,3.8814560572306314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,1,128,0,1,fp8,fp8,0,3.3232320149739585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,float16,0,3.903125445048014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,4,128,0,1,float16,fp8,0,3.891119956970215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,4,128,0,1,fp8,fp8,0,3.3386398951212564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,float16,0,3.917210578918457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,8,128,0,1,float16,fp8,0,3.932122548421224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,96,8,128,0,1,fp8,fp8,0,3.361776034037272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,float16,0,2.280384063720703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,96,128,0,1,fp8,fp8,0,1.9820159276326497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,96,128,0,1,float16,fp8,0,2.3146400451660156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,float16,0,1.9836106300354004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,1,128,0,1,float16,fp8,0,1.9828853607177734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,1,128,0,1,fp8,fp8,0,1.723482608795166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,float16,0,1.9987093607584636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,4,128,0,1,float16,fp8,0,2.003722667694092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,4,128,0,1,fp8,fp8,0,1.7371519406636555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,float16,0,2.014047940572103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,8,128,0,1,float16,fp8,0,2.0166452725728354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,96,8,128,0,1,fp8,fp8,0,1.7481865882873535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,float16,0,1.1959786415100098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,96,128,0,1,float16,fp8,0,1.2194080352783203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,96,128,0,1,fp8,fp8,0,1.0481066703796387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,float16,0,1.050437370936076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,1,128,0,1,float16,fp8,0,1.0501493612925212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,1,128,0,1,fp8,fp8,0,0.9208426475524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,float16,0,1.0532693068186443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,4,128,0,1,fp8,fp8,0,0.9244639873504639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,4,128,0,1,float16,fp8,0,1.0605706373850505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,float16,0,1.0622986952463787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,8,128,0,1,float16,fp8,0,1.0704320271809895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,float16,0,0.6600853204727173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,96,8,128,0,1,fp8,fp8,0,0.9291093349456787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,96,128,0,1,float16,fp8,0,0.6715786457061768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,96,128,0,1,fp8,fp8,0,0.5842399994532267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,float16,0,0.5847786664962769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,1,128,0,1,float16,fp8,0,0.5833653211593628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,1,128,0,1,fp8,fp8,0,0.521232008934021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,float16,0,0.5872746706008911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,4,128,0,1,float16,fp8,0,0.5903253157933553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,4,128,0,1,fp8,fp8,0,0.5231200059254965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,float16,0,0.5946613152821859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,8,128,0,1,float16,fp8,0,0.5947253306706747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,float16,0,0.38993601004282635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,96,8,128,0,1,fp8,fp8,0,0.5262506802876791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,96,128,0,1,float16,fp8,0,0.39643200238545734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,96,128,0,1,fp8,fp8,0,0.35256532828013104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,float16,0,0.3489866654078166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,1,128,0,1,float16,fp8,0,0.35050666332244873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,1,128,0,1,fp8,fp8,0,0.3206826647122701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,float16,0,0.35181868076324463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,4,128,0,1,float16,fp8,0,0.3519359827041626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,4,128,0,1,fp8,fp8,0,0.32256533702214557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,float16,0,0.35602664947509766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,8,128,0,1,float16,fp8,0,0.35524264971415204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,96,8,128,0,1,fp8,fp8,0,0.3226026693979899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,float16,0,8.547349294026693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,1,128,0,1,float16,fp8,0,8.63693873087565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,1,128,0,1,fp8,fp8,0,7.454245249430339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,fp8,0,8.691226959228516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,4,128,0,1,fp8,fp8,0,7.504778544108073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,4,128,0,1,float16,float16,0,8.550597508748373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,float16,0,8.72486941019694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,8,128,0,1,float16,fp8,0,8.715759913126627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,float16,0,4.98140811920166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,96,128,0,1,float16,fp8,0,5.034293174743652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,96,128,0,1,fp8,fp8,0,4.345589319864909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,float16,0,4.256138801574707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,96,8,128,0,1,fp8,fp8,0,7.555893580118815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,1,128,0,1,fp8,fp8,0,3.704421361287435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,1,128,0,1,float16,fp8,0,4.274298667907715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,float16,0,4.23964786529541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,4,128,0,1,fp8,fp8,0,3.723322550455729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,4,128,0,1,float16,fp8,0,4.286704063415527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,float16,0,4.288922627766927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,8,128,0,1,fp8,fp8,0,3.7529919942220054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,96,8,128,0,1,float16,fp8,0,4.33677323659261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,float16,0,2.5146986643473306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,96,128,0,1,fp8,fp8,0,2.215328057607015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,96,128,0,1,float16,fp8,0,2.560959974924723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,float16,0,2.1467040379842124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,1,128,0,1,fp8,fp8,0,1.8905919392903645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,1,128,0,1,float16,fp8,0,2.1723413467407227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,float16,0,2.1591307322184243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,4,128,0,1,fp8,fp8,0,1.9007093111673992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,4,128,0,1,float16,fp8,0,2.1816107432047525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,float16,0,2.182197411855062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,8,128,0,1,fp8,fp8,0,1.9174985885620117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,96,8,128,0,1,float16,fp8,0,2.2128052711486816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,float16,0,1.2992586294809978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,96,128,0,1,fp8,fp8,0,1.1572373708089192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,96,128,0,1,float16,fp8,0,1.3290826479593914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,float16,0,1.1186880270640056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,1,128,0,1,fp8,fp8,0,0.9918293158213297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,1,128,0,1,float16,fp8,0,1.1305973529815674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,float16,0,1.1246826648712158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,4,128,0,1,fp8,fp8,0,0.994762659072876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,4,128,0,1,float16,fp8,0,1.1355093320210774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,float16,0,1.1358506679534912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,float16,0,0.6919146378835043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,8,128,0,1,fp8,fp8,0,1.0014293193817139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,96,8,128,0,1,float16,fp8,0,1.1471786499023438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,96,128,0,1,fp8,fp8,0,0.6222613255182902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,float16,0,0.6048853397369385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,1,128,0,1,float16,fp8,0,0.6059199968973795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,1,128,0,1,fp8,fp8,0,0.5391519864400228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,float16,0,0.6080053249994913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,4,128,0,1,float16,fp8,0,0.6134560108184814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,4,128,0,1,fp8,fp8,0,0.5411253372828165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,float16,0,0.6127573251724243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,8,128,0,1,float16,fp8,0,0.6170293490091959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,8,128,0,1,fp8,fp8,0,0.5435200134913126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,96,96,128,0,1,float16,fp8,0,0.705946683883667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,float16,0,0.3925333420435588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,96,128,0,1,float16,fp8,0,0.40014398097991943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,96,128,0,1,fp8,fp8,0,0.35313065846761066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,float16,0,0.34513068199157715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,1,128,0,1,float16,fp8,0,0.3442933162053426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,1,128,0,1,fp8,fp8,0,0.31249600648880005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,float16,0,0.3469280004501343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,4,128,0,1,float16,fp8,0,0.3507680098215739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,4,128,0,1,fp8,fp8,0,0.31441599130630493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,float16,0,0.34860801696777344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,8,128,0,1,float16,fp8,0,0.35286398728688556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,96,8,128,0,1,fp8,fp8,0,0.3163306713104248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,float16,0,0.23957333962122598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,96,128,0,1,float16,fp8,0,0.24489599466323853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,96,128,0,1,fp8,fp8,0,0.21808000405629477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,float16,0,0.20670932531356812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,1,128,0,1,float16,fp8,0,0.2090346614519755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,1,128,0,1,fp8,fp8,0,0.19556800524393717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,float16,0,0.20851733287175497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,4,128,0,1,float16,fp8,0,0.21080533663431802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,4,128,0,1,fp8,fp8,0,0.19687465826670328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,float16,0,0.21115734179814658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,8,128,0,1,float16,fp8,0,0.21208532651265463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,96,8,128,0,1,fp8,fp8,0,0.1988746722539266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,1,128,0,1,fp8,fp8,0,5.062597274780273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,float16,0,5.769450505574544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,1,128,0,1,float16,fp8,0,5.8140106201171875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,4,128,0,1,fp8,fp8,0,5.090234756469727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,float16,0,5.809765497843425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,4,128,0,1,float16,fp8,0,5.849168141682942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,8,128,0,1,fp8,fp8,0,5.126282691955566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,float16,0,5.8951466878255205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,96,8,128,0,1,float16,fp8,0,5.905322392781575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,96,128,0,1,fp8,fp8,0,2.9988800684611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,float16,0,3.400266647338867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,96,128,0,1,float16,fp8,0,3.4497814178466797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,float16,0,2.876607894897461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,1,128,0,1,float16,fp8,0,2.8947839736938477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,1,128,0,1,fp8,fp8,0,2.5297066370646157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,float16,0,2.9045279820760093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,4,128,0,1,float16,fp8,0,2.9130026499430337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,4,128,0,1,fp8,fp8,0,2.540815989176432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,float16,0,2.940080006917318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,8,128,0,1,fp8,fp8,0,2.566549301147461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,96,8,128,0,1,float16,fp8,0,2.9519519805908203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,float16,0,1.731584072113037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,96,128,0,1,float16,fp8,0,1.7595787048339844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,96,128,0,1,fp8,fp8,0,1.5355146725972493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,float16,0,1.4763414065043132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,1,128,0,1,fp8,fp8,0,1.2994879881540935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,1,128,0,1,float16,fp8,0,1.4807626406351726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,float16,0,1.4871412913004558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,4,128,0,1,fp8,fp8,0,1.3050346374511719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,4,128,0,1,float16,fp8,0,1.4903839429219563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,float16,0,1.5059092839558919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,8,128,0,1,fp8,fp8,0,1.318949302037557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,96,8,128,0,1,float16,fp8,0,1.5130133628845215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,float16,0,0.898517370223999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,96,128,0,1,fp8,fp8,0,0.8078186511993408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,96,128,0,1,float16,fp8,0,0.9191679954528809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,float16,0,0.7744853496551514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,1,128,0,1,fp8,fp8,0,0.6856106917063395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,1,128,0,1,float16,fp8,0,0.7765279610951742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,float16,0,0.7788053353627523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,4,128,0,1,float16,fp8,0,0.782474676767985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,4,128,0,1,fp8,fp8,0,0.6882346471150717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,float16,0,0.7879040241241455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,8,128,0,1,fp8,fp8,0,0.6943626403808594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,96,8,128,0,1,float16,fp8,0,0.7910239696502686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,float16,0,0.4862613280614217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,96,128,0,1,fp8,fp8,0,0.43640534083048504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,96,128,0,1,float16,fp8,0,0.49562664826711017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,float16,0,0.4220106601715088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,1,128,0,1,fp8,fp8,0,0.3780213197072347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,1,128,0,1,float16,fp8,0,0.4208639860153198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,float16,0,0.4254719813664754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,4,128,0,1,fp8,fp8,0,0.37885332107543945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,4,128,0,1,float16,fp8,0,0.4242986838022868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,float16,0,0.4292106628417969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,8,128,0,1,float16,fp8,0,0.4293653170267741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,96,8,128,0,1,fp8,fp8,0,0.38129599889119464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,float16,0,0.2781013250350952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,96,128,0,1,fp8,fp8,0,0.25282132625579834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,96,128,0,1,float16,fp8,0,0.28294400374094647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,float16,0,0.24123199780782065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,1,128,0,1,float16,fp8,0,0.24038932720820108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,1,128,0,1,fp8,fp8,0,0.22217067082722983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,float16,0,0.24452267090479532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,4,128,0,1,fp8,fp8,0,0.22304532925287882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,4,128,0,1,float16,fp8,0,0.24286933739980063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,float16,0,0.24629332621892294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,8,128,0,1,float16,fp8,0,0.2461013396581014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,96,8,128,0,1,fp8,fp8,0,0.22574400901794434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,float16,0,0.1743519902229309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,96,128,0,1,fp8,fp8,0,0.16063466668128967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,96,128,0,1,float16,fp8,0,0.17707200845082602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,float16,0,0.15185599525769553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,1,128,0,1,fp8,fp8,0,0.139957328637441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,1,128,0,1,float16,fp8,0,0.15254933635393778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,float16,0,0.15075733264287314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,4,128,0,1,float16,fp8,0,0.15218666195869446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,4,128,0,1,fp8,fp8,0,0.13958932956059775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,float16,0,0.15093866984049478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,8,128,0,1,float16,fp8,0,0.15268266201019287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,96,8,128,0,1,fp8,fp8,0,0.14054399728775024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,1,128,0,1,fp8,fp8,0,5.685269037882487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,fp8,0,6.3326772054036455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,1,128,0,1,float16,float16,0,6.49348258972168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,float16,0,6.562869389851888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,4,128,0,1,float16,fp8,0,6.538933436075847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,4,128,0,1,fp8,fp8,0,6.586896260579427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,float16,0,6.67082150777181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,8,128,0,1,fp8,fp8,0,6.305226643880208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,96,8,128,0,1,float16,fp8,0,6.645280202229817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,float16,0,3.8874025344848633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,96,128,0,1,float16,fp8,0,3.7916959126790366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,float16,0,3.1151040395100913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,1,128,0,1,float16,fp8,0,3.11079470316569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,1,128,0,1,fp8,fp8,0,2.858565330505371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,float16,0,3.1756054560343423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,4,128,0,1,float16,fp8,0,3.222112019856771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,4,128,0,1,fp8,fp8,0,3.2603785196940103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,float16,0,3.313173294067383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,8,128,0,1,float16,fp8,0,3.3302666346232095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,96,8,128,0,1,fp8,fp8,0,3.0624958674112954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,float16,0,1.9001866976420085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,96,128,0,1,float16,fp8,0,1.8869013786315918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,96,128,0,1,fp8,fp8,0,1.825925350189209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,float16,0,1.5670347213745117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,1,128,0,1,float16,fp8,0,1.5685920715332031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,1,128,0,1,fp8,fp8,0,1.4402027130126953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,float16,0,1.5883946418762207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,4,128,0,1,float16,fp8,0,1.58734925587972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,4,128,0,1,fp8,fp8,0,1.5655147234598796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,float16,0,1.6108585993448894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,8,128,0,1,float16,fp8,0,1.6229813893636067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,96,8,128,0,1,fp8,fp8,0,1.5004693667093914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,float16,0,0.9648693402608236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,96,128,0,1,float16,fp8,0,0.9455040295918783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,96,128,0,1,fp8,fp8,0,0.9127093156178793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,float16,0,0.7985119819641113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,1,128,0,1,float16,fp8,0,0.8000586827596029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,1,128,0,1,fp8,fp8,0,0.7341759999593099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,float16,0,0.8064373334248861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,4,128,0,1,float16,fp8,0,0.8104426860809326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,4,128,0,1,fp8,fp8,0,0.7409546375274658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,float16,0,0.8180053234100342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,8,128,0,1,float16,fp8,0,0.81987198193868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,float16,0,0.49885865052541095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,96,8,128,0,1,fp8,fp8,0,0.7556479771931967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,96,128,0,1,float16,fp8,0,0.4864000082015991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,96,128,0,1,fp8,fp8,0,0.4694986740748088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,float16,0,0.4142560164133708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,1,128,0,1,float16,fp8,0,0.41249601046244305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,1,128,0,1,fp8,fp8,0,0.37621867656707764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,float16,0,0.4168586730957031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,4,128,0,1,float16,fp8,0,0.4184853235880534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,4,128,0,1,fp8,fp8,0,0.3811093171437581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,float16,0,0.423253337542216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,8,128,0,1,float16,fp8,0,0.42313599586486816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,96,8,128,0,1,fp8,fp8,0,0.38863468170166016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,float16,0,0.2648746569951375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,96,128,0,1,float16,fp8,0,0.2588106592496236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,96,128,0,1,fp8,fp8,0,0.24758400519688925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,float16,0,0.22038400173187256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,1,128,0,1,float16,fp8,0,0.21965332825978598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,1,128,0,1,fp8,fp8,0,0.19956799348195395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,float16,0,0.22246932983398438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,4,128,0,1,float16,fp8,0,0.22360533475875854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,4,128,0,1,fp8,fp8,0,0.20249066750208536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,float16,0,0.2243679960568746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,8,128,0,1,float16,fp8,0,0.2249173323313395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,96,8,128,0,1,fp8,fp8,0,0.2066719929377238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,float16,0,0.14692266782124838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,96,128,0,1,float16,fp8,0,0.14526933431625366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,96,128,0,1,fp8,fp8,0,0.1381333371003469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,float16,0,0.11962133646011353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,1,128,0,1,float16,fp8,0,0.11920533577601115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,1,128,0,1,fp8,fp8,0,0.10965333382288615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,float16,0,0.12020267049471538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,4,128,0,1,float16,fp8,0,0.1202346682548523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,4,128,0,1,fp8,fp8,0,0.11173333724339803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,float16,0,0.12072533369064331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,8,128,0,1,float16,fp8,0,0.1202880044778188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,96,8,128,0,1,fp8,fp8,0,0.11354133486747742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,float16,0,0.08683733145395915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,96,128,0,1,float16,fp8,0,0.0848533312479655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,96,128,0,1,fp8,fp8,0,0.08343999584515889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,float16,0,0.07190399865309398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,1,128,0,1,float16,fp8,0,0.07205866773923238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,1,128,0,1,fp8,fp8,0,0.06365866462389629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,float16,0,0.070592001080513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,4,128,0,1,float16,fp8,0,0.0709440012772878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,4,128,0,1,fp8,fp8,0,0.06434133152167003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,float16,0,0.07090133428573608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,8,128,0,1,float16,fp8,0,0.07155733307202657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,96,8,128,0,1,fp8,fp8,0,0.06413866579532623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,1,128,0,1,float16,fp8,0,5.57426643371582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,1,128,0,1,float16,float16,0,5.689114888509114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,1,128,0,1,fp8,fp8,0,5.074816068013509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,4,128,0,1,float16,float16,0,5.769269307454427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,4,128,0,1,float16,fp8,0,5.774778366088867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,4,128,0,1,fp8,fp8,0,5.847077051798503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,8,128,0,1,float16,float16,0,5.815701166788737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,8,128,0,1,float16,fp8,0,5.876773198445638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,96,128,0,1,float16,float16,0,3.432880083719889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,96,128,0,1,float16,fp8,0,3.3312158584594727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,1,128,0,1,float16,float16,0,2.727754592895508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,96,8,128,0,1,fp8,fp8,0,5.5854237874348955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,1,128,0,1,float16,fp8,0,2.740309397379557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,1,128,0,1,fp8,fp8,0,2.507930596669515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,4,128,0,1,float16,float16,0,2.8281119664510093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,4,128,0,1,fp8,fp8,0,2.903008143107096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,8,128,0,1,float16,float16,0,2.9234933853149414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,4,128,0,1,float16,fp8,0,2.8554347356160483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,8,128,0,1,float16,fp8,0,2.929178555806478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,96,8,128,0,1,fp8,fp8,0,2.784597396850586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,96,128,0,1,float16,float16,0,1.6638293266296387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,96,128,0,1,float16,fp8,0,1.668501377105713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,96,128,0,1,fp8,fp8,0,1.6295679410298665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,1,128,0,1,float16,fp8,0,1.3767786026000977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,1,128,0,1,fp8,fp8,0,1.2653546333312988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,1,128,0,1,float16,float16,0,1.3793973922729492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,4,128,0,1,float16,float16,0,1.3947946230570476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,4,128,0,1,fp8,fp8,0,1.4265546798706055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,4,128,0,1,float16,fp8,0,1.3952693939208984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,8,128,0,1,float16,float16,0,1.4095199902852376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,8,128,0,1,float16,fp8,0,1.4189972877502441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,96,128,0,1,float16,float16,0,0.8443360328674316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,96,8,128,0,1,fp8,fp8,0,1.3227252960205078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,96,128,0,1,float16,fp8,0,0.8272266387939453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,96,128,0,1,fp8,fp8,0,0.820799986521403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,1,128,0,1,float16,float16,0,0.7006879647572836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,1,128,0,1,float16,fp8,0,0.7003466288248698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,1,128,0,1,fp8,fp8,0,0.6427093346913656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,4,128,0,1,float16,float16,0,0.7060800393422445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,4,128,0,1,float16,fp8,0,0.7093013127644857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,4,128,0,1,fp8,fp8,0,0.6523199876149496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,8,128,0,1,float16,float16,0,0.7166186968485514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,8,128,0,1,float16,fp8,0,0.7200373013814291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,96,8,128,0,1,fp8,fp8,0,0.6660159826278687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,96,128,0,1,float16,float16,0,0.4370559851328532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,96,128,0,1,float16,fp8,0,0.4249653418858846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,96,128,0,1,fp8,fp8,0,0.42001068592071533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,1,128,0,1,float16,float16,0,0.3608800172805786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,1,128,0,1,float16,fp8,0,0.3643466631571452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,1,128,0,1,fp8,fp8,0,0.32843200365702313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,4,128,0,1,float16,float16,0,0.36535998185475665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,4,128,0,1,float16,fp8,0,0.36534400780995685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,4,128,0,1,fp8,fp8,0,0.33368531862894696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,8,128,0,1,float16,float16,0,0.37027732531229657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,8,128,0,1,float16,fp8,0,0.37087468306223553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,96,8,128,0,1,fp8,fp8,0,0.3426186641057332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,96,128,0,1,float16,float16,0,0.23275200525919595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,96,128,0,1,float16,fp8,0,0.22696000337600708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,96,128,0,1,fp8,fp8,0,0.22312533855438232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,1,128,0,1,float16,float16,0,0.1923253337542216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,1,128,0,1,float16,fp8,0,0.19398399194081625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,1,128,0,1,fp8,fp8,0,0.17468800147374472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,4,128,0,1,float16,float16,0,0.19433599710464478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,4,128,0,1,float16,fp8,0,0.19393066565195718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,4,128,0,1,fp8,fp8,0,0.17718400557835898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,8,128,0,1,float16,float16,0,0.1960960030555725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,8,128,0,1,float16,fp8,0,0.19695999224980673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,96,8,128,0,1,fp8,fp8,0,0.1821333368619283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,96,128,0,1,float16,float16,0,0.12814399600028992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,96,128,0,1,float16,fp8,0,0.1264479955037435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,1,128,0,1,float16,float16,0,0.1034879982471466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,96,128,0,1,fp8,fp8,0,0.1223466694355011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,1,128,0,1,float16,fp8,0,0.1034879982471466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,1,128,0,1,fp8,fp8,0,0.09501333038012187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,4,128,0,1,float16,float16,0,0.10553600390752156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,4,128,0,1,float16,fp8,0,0.10499733686447144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,4,128,0,1,fp8,fp8,0,0.09758933385213216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,8,128,0,1,float16,float16,0,0.1055413285891215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,8,128,0,1,float16,fp8,0,0.10661333799362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,96,8,128,0,1,fp8,fp8,0,0.09948800007502238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,96,128,0,1,float16,float16,0,0.07461333274841309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,96,128,0,1,float16,fp8,0,0.07321066657702129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,96,128,0,1,fp8,fp8,0,0.07411199808120728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,1,128,0,1,float16,float16,0,0.06276800235112508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,1,128,0,1,float16,fp8,0,0.06211199859778086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,1,128,0,1,fp8,fp8,0,0.05629866818586985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,4,128,0,1,float16,float16,0,0.06258666515350342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,4,128,0,1,float16,fp8,0,0.06214400132497152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,4,128,0,1,fp8,fp8,0,0.056176001826922096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,8,128,0,1,float16,float16,0,0.062218666076660156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,8,128,0,1,float16,fp8,0,0.062277331948280334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,96,8,128,0,1,fp8,fp8,0,0.05541866521040598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,96,128,0,1,float16,float16,0,0.045706664522488914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,96,128,0,1,float16,fp8,0,0.04364266494909922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,96,128,0,1,fp8,fp8,0,0.040250666439533234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,1,128,0,1,float16,float16,0,0.04161066561937332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,1,128,0,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,1,128,0,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,4,128,0,1,float16,float16,0,0.04195733368396759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,4,128,0,1,float16,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,4,128,0,1,fp8,fp8,0,0.03773866593837738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,8,128,0,1,float16,float16,0,0.042266666889190674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,8,128,0,1,float16,fp8,0,0.04182399809360504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,96,8,128,0,1,fp8,fp8,0,0.037477334340413414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,1,128,0,1,float16,float16,0,2.527232011159261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,1,128,0,1,float16,fp8,0,2.52511994043986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,1,128,0,1,fp8,fp8,0,2.313690662384033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,4,128,0,1,float16,float16,0,2.58515199025472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,4,128,0,1,float16,fp8,0,2.6601386070251465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,4,128,0,1,fp8,fp8,0,2.7308851877848306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,8,128,0,1,float16,float16,0,2.6926345825195312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,8,128,0,1,float16,fp8,0,2.687295913696289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,96,128,0,1,float16,float16,0,1.5499359766642253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,96,8,128,0,1,fp8,fp8,0,2.612101395924886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,96,128,0,1,float16,fp8,0,1.514527956644694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,96,128,0,1,fp8,fp8,0,1.5253547032674153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,1,128,0,1,float16,float16,0,1.273967981338501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,1,128,0,1,float16,fp8,0,1.2719786961873372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,1,128,0,1,fp8,fp8,0,1.1677066485087078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,4,128,0,1,float16,float16,0,1.2838613192240398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,4,128,0,1,float16,fp8,0,1.283882697423299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,4,128,0,1,fp8,fp8,0,1.3271839618682861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,8,128,0,1,float16,float16,0,1.2994720141092937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,8,128,0,1,float16,fp8,0,1.3076586723327637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,96,8,128,0,1,fp8,fp8,0,1.236682653427124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,96,128,0,1,float16,float16,0,0.7731093565622965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,96,128,0,1,float16,fp8,0,0.7611520290374756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,96,128,0,1,fp8,fp8,0,0.7660160064697266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,1,128,0,1,float16,float16,0,0.648037314414978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,1,128,0,1,float16,fp8,0,0.6463786760965983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,1,128,0,1,fp8,fp8,0,0.5921760002772013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,4,128,0,1,float16,float16,0,0.6523040135701498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,4,128,0,1,float16,fp8,0,0.6519466638565063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,4,128,0,1,fp8,fp8,0,0.6006826559702555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,8,128,0,1,float16,float16,0,0.660368005434672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,8,128,0,1,float16,fp8,0,0.65993599096934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,96,8,128,0,1,fp8,fp8,0,0.6146399974822998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,96,128,0,1,float16,float16,0,0.40085868040720624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,96,128,0,1,float16,fp8,0,0.3906773328781128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,96,128,0,1,fp8,fp8,0,0.3936426639556885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,1,128,0,1,float16,float16,0,0.3348319927851359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,1,128,0,1,float16,fp8,0,0.33590400218963623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,1,128,0,1,fp8,fp8,0,0.3047093351682027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,4,128,0,1,float16,float16,0,0.3380853335062663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,4,128,0,1,float16,fp8,0,0.33823466300964355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,4,128,0,1,fp8,fp8,0,0.30900800228118896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,8,128,0,1,float16,float16,0,0.3426986535390218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,8,128,0,1,float16,fp8,0,0.3433599869410197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,96,8,128,0,1,fp8,fp8,0,0.3166453242301941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,96,128,0,1,float16,float16,0,0.21647467215855917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,96,128,0,1,float16,fp8,0,0.2083466649055481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,96,128,0,1,fp8,fp8,0,0.21089067061742148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,1,128,0,1,float16,float16,0,0.17916800578435263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,1,128,0,1,float16,fp8,0,0.1800959904988607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,1,128,0,1,fp8,fp8,0,0.16236799955368042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,4,128,0,1,float16,float16,0,0.18157333135604858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,4,128,0,1,float16,fp8,0,0.1811466614405314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,4,128,0,1,fp8,fp8,0,0.16382400194803873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,8,128,0,1,float16,float16,0,0.18238933881123862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,8,128,0,1,float16,fp8,0,0.18210132916768393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,96,8,128,0,1,fp8,fp8,0,0.1677173376083374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,96,128,0,1,float16,float16,0,0.12112533052762349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,96,128,0,1,float16,fp8,0,0.1183733344078064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,96,128,0,1,fp8,fp8,0,0.11637333035469055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,1,128,0,1,float16,float16,0,0.09910933176676433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,1,128,0,1,float16,fp8,0,0.0981333355108897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,1,128,0,1,fp8,fp8,0,0.08922666311264038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,4,128,0,1,float16,float16,0,0.09884799520174663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,4,128,0,1,float16,fp8,0,0.09941333532333374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,4,128,0,1,fp8,fp8,0,0.08898133039474487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,8,128,0,1,float16,float16,0,0.10129599769910176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,8,128,0,1,float16,fp8,0,0.10058666268984477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,96,8,128,0,1,fp8,fp8,0,0.09241599837938945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,96,128,0,1,float16,float16,0,0.06833600004514058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,96,128,0,1,float16,fp8,0,0.06516799827416737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,96,128,0,1,fp8,fp8,0,0.07021866738796234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,1,128,0,1,float16,float16,0,0.05624533196290334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,1,128,0,1,float16,fp8,0,0.055829331278800964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,1,128,0,1,fp8,fp8,0,0.050986667474110924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,4,128,0,1,float16,float16,0,0.05602666735649109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,4,128,0,1,float16,fp8,0,0.058378666639328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,4,128,0,1,fp8,fp8,0,0.05152533451716105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,8,128,0,1,float16,float16,0,0.05797866483529409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,8,128,0,1,float16,fp8,0,0.05806933343410492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,96,128,0,1,float16,float16,0,0.04164266586303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,96,8,128,0,1,fp8,fp8,0,0.051856001218159996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,96,128,0,1,float16,fp8,0,0.04163199911514918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,96,128,0,1,fp8,fp8,0,0.03925333420435587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,1,128,0,1,float16,float16,0,0.037690666814645134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,1,128,0,1,float16,fp8,0,0.03775466730197271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,1,128,0,1,fp8,fp8,0,0.03398400048414866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,4,128,0,1,float16,float16,0,0.03817066550254822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,4,128,0,1,float16,fp8,0,0.038165333370367684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,4,128,0,1,fp8,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,8,128,0,1,float16,float16,0,0.037861332297325134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,8,128,0,1,float16,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,96,8,128,0,1,fp8,fp8,0,0.035599999129772186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,96,128,0,1,float16,float16,0,0.02703999976317088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,96,128,0,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,96,128,0,1,fp8,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,1,128,0,1,float16,float16,0,0.02630399912595749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,1,128,0,1,float16,fp8,0,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,1,128,0,1,fp8,fp8,0,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,4,128,0,1,float16,float16,0,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,4,128,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,4,128,0,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,8,128,0,1,float16,float16,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,8,128,0,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,96,8,128,0,1,fp8,fp8,0,0.025098666548728943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,1,128,0,1,float16,float16,0,1.3792053858439128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,1,128,0,1,float16,fp8,0,1.3780800501505535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,1,128,0,1,fp8,fp8,0,1.2745973269144695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,4,128,0,1,float16,float16,0,1.3934292793273926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,4,128,0,1,float16,fp8,0,1.3948319753011067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,4,128,0,1,fp8,fp8,0,1.450917402903239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,8,128,0,1,float16,float16,0,1.4174240430196126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,8,128,0,1,float16,fp8,0,1.415013313293457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,96,128,0,1,float16,float16,0,0.8286080360412598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,96,8,128,0,1,fp8,fp8,0,1.3900693257649739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,96,128,0,1,float16,fp8,0,0.8096746603647867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,96,128,0,1,fp8,fp8,0,0.8174560070037842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,1,128,0,1,float16,float16,0,0.6991253693898519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,1,128,0,1,float16,fp8,0,0.6962719758351644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,1,128,0,1,fp8,fp8,0,0.6464639902114868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,4,128,0,1,float16,float16,0,0.7044213612874349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,4,128,0,1,float16,fp8,0,0.7056427001953125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,4,128,0,1,fp8,fp8,0,0.6514346599578857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,8,128,0,1,float16,float16,0,0.7117973168691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,8,128,0,1,float16,fp8,0,0.7155893643697103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,96,8,128,0,1,fp8,fp8,0,0.6648746728897095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,96,128,0,1,float16,float16,0,0.4222293297449748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,96,128,0,1,float16,fp8,0,0.4155413309733073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,96,128,0,1,fp8,fp8,0,0.41783467928568524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,1,128,0,1,float16,float16,0,0.35820265611012775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,1,128,0,1,float16,fp8,0,0.35797866185506183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,1,128,0,1,fp8,fp8,0,0.3295573393503825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,4,128,0,1,float16,float16,0,0.36025599638621014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,4,128,0,1,float16,fp8,0,0.36189866065979004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,4,128,0,1,fp8,fp8,0,0.33244800567626953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,8,128,0,1,float16,float16,0,0.365994652112325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,8,128,0,1,float16,fp8,0,0.36834665139516193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,96,8,128,0,1,fp8,fp8,0,0.3413013219833374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,96,128,0,1,float16,float16,0,0.22362667322158813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,96,128,0,1,float16,fp8,0,0.21810134251912436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,96,128,0,1,fp8,fp8,0,0.22011200586954752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,1,128,0,1,float16,float16,0,0.18879467248916626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,1,128,0,1,float16,fp8,0,0.18903466065724692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,1,128,0,1,fp8,fp8,0,0.17307200034459433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,4,128,0,1,float16,float16,0,0.1916853388150533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,4,128,0,1,float16,fp8,0,0.19107200702031454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,4,128,0,1,fp8,fp8,0,0.17538134256998697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,8,128,0,1,float16,float16,0,0.19293866554896036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,8,128,0,1,float16,fp8,0,0.19143466154734293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,96,8,128,0,1,fp8,fp8,0,0.18014933665593466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,96,128,0,1,float16,float16,0,0.12170132994651794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,96,128,0,1,float16,fp8,0,0.11963733037312825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,96,128,0,1,fp8,fp8,0,0.11962667107582092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,1,128,0,1,float16,float16,0,0.10162132978439331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,1,128,0,1,float16,fp8,0,0.102101335922877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,1,128,0,1,fp8,fp8,0,0.09296533465385437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,4,128,0,1,float16,fp8,0,0.10379733641942342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,4,128,0,1,float16,float16,0,0.10371733705202739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,4,128,0,1,fp8,fp8,0,0.09549333651860555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,8,128,0,1,float16,float16,0,0.10488532980283101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,8,128,0,1,float16,fp8,0,0.10443733135859172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,96,128,0,1,float16,float16,0,0.06982933481534322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,96,8,128,0,1,fp8,fp8,0,0.09898133079210918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,96,128,0,1,float16,fp8,0,0.06841599941253662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,96,128,0,1,fp8,fp8,0,0.07318399846553802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,1,128,0,1,float16,float16,0,0.06028800209363302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,1,128,0,1,float16,fp8,0,0.06002133091290792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,1,128,0,1,fp8,fp8,0,0.053583999474843345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,4,128,0,1,float16,float16,0,0.05975466469923655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,4,128,0,1,float16,fp8,0,0.059690664211908974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,4,128,0,1,fp8,fp8,0,0.05440000196297964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,8,128,0,1,float16,float16,0,0.06004266440868378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,8,128,0,1,float16,fp8,0,0.060165335734685264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,96,8,128,0,1,fp8,fp8,0,0.05407466491063436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,96,128,0,1,float16,float16,0,0.03949866692225138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,96,128,0,1,float16,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,96,128,0,1,fp8,fp8,0,0.03748266647259394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,1,128,0,1,float16,float16,0,0.037685332198937736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,1,128,0,1,float16,fp8,0,0.037530665596326195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,1,128,0,1,fp8,fp8,0,0.03385066737731298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,4,128,0,1,float16,float16,0,0.03745600084463755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,4,128,0,1,float16,fp8,0,0.03769599894682566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,4,128,0,1,fp8,fp8,0,0.03426666557788849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,8,128,0,1,float16,float16,0,0.03772266705830892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,8,128,0,1,float16,fp8,0,0.037605332831541695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,96,8,128,0,1,fp8,fp8,0,0.03543466577927271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,96,128,0,1,float16,float16,0,0.027301333844661713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,96,128,0,1,float16,fp8,0,0.02941333254178365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,96,128,0,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,1,128,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,1,128,0,1,float16,fp8,0,0.025775998830795288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,1,128,0,1,fp8,fp8,0,0.024512000381946564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,4,128,0,1,float16,float16,0,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,4,128,0,1,float16,fp8,0,0.02718399961789449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,4,128,0,1,fp8,fp8,0,0.02513066679239273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,8,128,0,1,float16,float16,0,0.025850666066010792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,8,128,0,1,float16,fp8,0,0.0271573339899381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,96,8,128,0,1,fp8,fp8,0,0.02496533344189326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,96,128,0,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,96,128,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,96,128,0,1,fp8,fp8,0,0.021327999730904896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,1,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,1,128,0,1,float16,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,4,128,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,1,128,0,1,fp8,fp8,0,0.019567999988794327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,4,128,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,4,128,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,8,128,0,1,float16,float16,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,8,128,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,96,8,128,0,1,fp8,fp8,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,1,128,0,1,float16,float16,0,0.8714026610056559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,1,128,0,1,float16,fp8,0,0.8717439969380697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,1,128,0,1,fp8,fp8,0,0.815333366394043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,4,128,0,1,float16,float16,0,0.8776213328043619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,4,128,0,1,float16,fp8,0,0.8825813134511312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,4,128,0,1,fp8,fp8,0,0.8378240267435709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,8,128,0,1,float16,float16,0,0.8849066893259684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,8,128,0,1,float16,fp8,0,0.8871040344238281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,96,128,0,1,float16,float16,0,0.5098933378855387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,96,8,128,0,1,fp8,fp8,0,0.8341759840647379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,96,128,0,1,float16,fp8,0,0.4996639887491862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,96,128,0,1,fp8,fp8,0,0.49886401494344074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,1,128,0,1,float16,float16,0,0.4439573287963867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,1,128,0,1,float16,fp8,0,0.44276265303293866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,1,128,0,1,fp8,fp8,0,0.4123733441034953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,4,128,0,1,float16,float16,0,0.44609065850575763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,4,128,0,1,float16,fp8,0,0.44572798411051434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,4,128,0,1,fp8,fp8,0,0.41786666711171466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,8,128,0,1,float16,float16,0,0.4514400164286296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,8,128,0,1,float16,fp8,0,0.45094935099283856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,96,8,128,0,1,fp8,fp8,0,0.4224693377812703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,96,128,0,1,float16,float16,0,0.2667466600735982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,96,128,0,1,float16,fp8,0,0.2605546712875366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,96,128,0,1,fp8,fp8,0,0.2595146695772807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,1,128,0,1,float16,float16,0,0.23017066717147827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,1,128,0,1,float16,fp8,0,0.23060266176859537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,1,128,0,1,fp8,fp8,0,0.21405333280563354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,4,128,0,1,float16,float16,0,0.2336906592051188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,4,128,0,1,float16,fp8,0,0.23279466231664023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,4,128,0,1,fp8,fp8,0,0.21644800901412964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,8,128,0,1,float16,float16,0,0.23408534129460654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,8,128,0,1,float16,fp8,0,0.23492799202601114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,96,8,128,0,1,fp8,fp8,0,0.2212053338686625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,96,128,0,1,float16,float16,0,0.14225600163141885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,96,128,0,1,float16,fp8,0,0.13978667060534158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,96,128,0,1,fp8,fp8,0,0.14043733477592468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,1,128,0,1,float16,float16,0,0.12380799651145935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,1,128,0,1,float16,fp8,0,0.12370666861534119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,1,128,0,1,fp8,fp8,0,0.11348266402880351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,4,128,0,1,float16,float16,0,0.12485866745313008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,4,128,0,1,float16,fp8,0,0.12553600470225015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,4,128,0,1,fp8,fp8,0,0.11588799953460693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,8,128,0,1,float16,float16,0,0.12599466244379678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,8,128,0,1,float16,fp8,0,0.12546666463216147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,96,8,128,0,1,fp8,fp8,0,0.11819733182589214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,96,128,0,1,float16,float16,0,0.08092266817887624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,96,128,0,1,float16,fp8,0,0.07891733447710673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,96,128,0,1,fp8,fp8,0,0.08214933176835378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,1,128,0,1,float16,float16,0,0.07179200152556102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,1,128,0,1,float16,fp8,0,0.06957866748174031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,4,128,0,1,float16,float16,0,0.07021333277225494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,1,128,0,1,fp8,fp8,0,0.06362666686375935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,4,128,0,1,float16,fp8,0,0.06989866495132446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,4,128,0,1,fp8,fp8,0,0.06356266637643178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,8,128,0,1,float16,float16,0,0.07088000078996022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,8,128,0,1,float16,fp8,0,0.07109866539637248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,96,8,128,0,1,fp8,fp8,0,0.06413333117961884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,96,128,0,1,float16,float16,0,0.045978665351867676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,96,128,0,1,float16,fp8,0,0.04586666822433472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,96,128,0,1,fp8,fp8,0,0.0440586656332016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,1,128,0,1,float16,float16,0,0.04372799893220266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,1,128,0,1,float16,fp8,0,0.04358399907747904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,1,128,0,1,fp8,fp8,0,0.03974399964014689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,4,128,0,1,float16,float16,0,0.04326933125654856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,4,128,0,1,float16,fp8,0,0.043882668018341064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,4,128,0,1,fp8,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,8,128,0,1,float16,float16,0,0.0436160018046697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,8,128,0,1,float16,fp8,0,0.043285335103670754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,96,8,128,0,1,fp8,fp8,0,0.03965333352486292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,96,128,0,1,float16,float16,0,0.02922666569550832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,96,128,0,1,float16,fp8,0,0.029930666089057922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,96,128,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,1,128,0,1,float16,float16,0,0.028597332537174225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,1,128,0,1,float16,fp8,0,0.028912000358104706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,1,128,0,1,fp8,fp8,0,0.025829332570234936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,4,128,0,1,float16,float16,0,0.027893332143624622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,4,128,0,1,float16,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,4,128,0,1,fp8,fp8,0,0.02722666660944621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,8,128,0,1,float16,float16,0,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,8,128,0,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,96,8,128,0,1,fp8,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,96,128,0,1,float16,float16,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,96,128,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,96,128,0,1,fp8,fp8,0,0.021962667504946392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,1,128,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,1,128,0,1,float16,fp8,0,0.022431999444961548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,1,128,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,4,128,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,4,128,0,1,float16,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,4,128,0,1,fp8,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,8,128,0,1,float16,float16,0,0.02162133405605952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,8,128,0,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,96,8,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,96,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,96,128,0,1,float16,fp8,0,0.018165333817402523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,96,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,1,128,0,1,float16,float16,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,1,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,1,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,4,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,4,128,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,4,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,8,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,8,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,96,8,128,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,1,128,0,1,float16,float16,0,0.6269599994023641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,1,128,0,1,float16,fp8,0,0.6272373199462891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,1,128,0,1,fp8,fp8,0,0.59224534034729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,4,128,0,1,float16,float16,0,0.6291306813557943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,4,128,0,1,float16,fp8,0,0.6308799982070923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,4,128,0,1,fp8,fp8,0,0.596725344657898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,8,128,0,1,float16,float16,0,0.6314560174942017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,8,128,0,1,float16,fp8,0,0.6316373348236084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,96,128,0,1,float16,float16,0,0.3544693390528361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,96,8,128,0,1,fp8,fp8,0,0.6020693381627401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,96,128,0,1,float16,fp8,0,0.3490080038706462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,96,128,0,1,fp8,fp8,0,0.3491679827372233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,1,128,0,1,float16,float16,0,0.3217173417409261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,1,128,0,1,float16,fp8,0,0.3227199912071228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,1,128,0,1,fp8,fp8,0,0.3033546606699626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,4,128,0,1,float16,float16,0,0.3227733373641968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,4,128,0,1,float16,fp8,0,0.32313066720962524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,4,128,0,1,fp8,fp8,0,0.3046613335609436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,8,128,0,1,float16,float16,0,0.3237226605415344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,8,128,0,1,float16,fp8,0,0.323472003142039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,96,8,128,0,1,fp8,fp8,0,0.30851199229558307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,96,128,0,1,float16,float16,0,0.18523200352986655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,96,128,0,1,float16,fp8,0,0.18333333730697632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,96,128,0,1,fp8,fp8,0,0.1834026575088501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,1,128,0,1,float16,float16,0,0.16942399740219116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,1,128,0,1,float16,fp8,0,0.169322669506073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,1,128,0,1,fp8,fp8,0,0.15799466768900552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,4,128,0,1,float16,float16,0,0.16945600509643555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,4,128,0,1,float16,fp8,0,0.1705440084139506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,4,128,0,1,fp8,fp8,0,0.15959999958674112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,8,128,0,1,float16,float16,0,0.17097065846125284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,8,128,0,1,float16,fp8,0,0.16932799418767294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,96,8,128,0,1,fp8,fp8,0,0.16164799531300864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,96,128,0,1,float16,float16,0,0.10168000062306722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,96,128,0,1,float16,fp8,0,0.10079999764760335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,96,128,0,1,fp8,fp8,0,0.10281599561373393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,1,128,0,1,float16,float16,0,0.09180266658465068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,1,128,0,1,float16,fp8,0,0.0922879974047343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,4,128,0,1,float16,float16,0,0.09174399574597676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,1,128,0,1,fp8,fp8,0,0.0844533344109853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,4,128,0,1,float16,fp8,0,0.09309333562850952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,4,128,0,1,fp8,fp8,0,0.08532266815503438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,8,128,0,1,float16,float16,0,0.09283733367919922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,8,128,0,1,float16,fp8,0,0.09363733728726704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,96,8,128,0,1,fp8,fp8,0,0.08499200145403545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,96,128,0,1,float16,float16,0,0.05682666599750519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,96,128,0,1,float16,fp8,0,0.05606933434804281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,1,128,0,1,float16,float16,0,0.05442666510740916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,96,128,0,1,fp8,fp8,0,0.05495466788609823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,1,128,0,1,float16,fp8,0,0.054005334774653115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,1,128,0,1,fp8,fp8,0,0.05041066805521647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,4,128,0,1,float16,float16,0,0.05444266895453135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,4,128,0,1,float16,fp8,0,0.054431999723116554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,4,128,0,1,fp8,fp8,0,0.051141331593195595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,8,128,0,1,float16,float16,0,0.05426666637261709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,8,128,0,1,float16,fp8,0,0.054058666030565895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,96,8,128,0,1,fp8,fp8,0,0.05036266644795736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,96,128,0,1,float16,float16,0,0.03515200068553289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,96,128,0,1,float16,fp8,0,0.03588266670703888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,96,128,0,1,fp8,fp8,0,0.03368533402681351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,1,128,0,1,float16,float16,0,0.0353973334034284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,1,128,0,1,float16,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,1,128,0,1,fp8,fp8,0,0.03376533339420954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,4,128,0,1,float16,float16,0,0.03512533257404963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,4,128,0,1,float16,fp8,0,0.03375466664632162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,4,128,0,1,fp8,fp8,0,0.03382399926582972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,8,128,0,1,float16,float16,0,0.03357866654793421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,8,128,0,1,float16,fp8,0,0.03549333413441976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,96,8,128,0,1,fp8,fp8,0,0.0335413341720899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,96,128,0,1,float16,float16,0,0.025306666890780132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,96,128,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,96,128,0,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,1,128,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,1,128,0,1,float16,fp8,0,0.023605334262053173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,1,128,0,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,4,128,0,1,float16,float16,0,0.023792001108328503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,4,128,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,4,128,0,1,fp8,fp8,0,0.02401600033044815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,8,128,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,8,128,0,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,96,8,128,0,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,96,128,0,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,96,128,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,96,128,0,1,fp8,fp8,0,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,1,128,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,1,128,0,1,float16,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,1,128,0,1,fp8,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,4,128,0,1,float16,float16,0,0.020501332978407543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,4,128,0,1,float16,fp8,0,0.02070933332045873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,4,128,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,8,128,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,8,128,0,1,float16,fp8,0,0.019493332753578823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,96,8,128,0,1,fp8,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,96,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,96,128,0,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,96,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,1,128,0,1,float16,float16,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,1,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,1,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,4,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,4,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,4,128,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,8,128,0,1,float16,float16,0,0.015909332782030106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,8,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,96,8,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,1,128,0,1,float16,float16,0,0.5097866853078207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,1,128,0,1,float16,fp8,0,0.5087093512217203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,1,128,0,1,fp8,fp8,0,0.48446400960286456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,4,128,0,1,float16,float16,0,0.5111146767934164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,4,128,0,1,float16,fp8,0,0.509391983350118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,4,128,0,1,fp8,fp8,0,0.48815464973449707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,8,128,0,1,float16,float16,0,0.5116906563440958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,8,128,0,1,float16,fp8,0,0.5115253527959188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,96,128,0,1,float16,float16,0,0.2791200081507365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,96,8,128,0,1,fp8,fp8,0,0.4902506669362386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,96,128,0,1,float16,fp8,0,0.27684799830118817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,96,128,0,1,fp8,fp8,0,0.2757333318392436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,1,128,0,1,float16,float16,0,0.2620426615079244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,1,128,0,1,float16,fp8,0,0.26285332441329956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,1,128,0,1,fp8,fp8,0,0.24849067131678262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,4,128,0,1,float16,float16,0,0.2619626720746358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,4,128,0,1,float16,fp8,0,0.2630293369293213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,4,128,0,1,fp8,fp8,0,0.25040533145268756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,8,128,0,1,float16,float16,0,0.26366400718688965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,8,128,0,1,float16,fp8,0,0.2639039953549703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,96,8,128,0,1,fp8,fp8,0,0.2548266649246216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,96,128,0,1,float16,float16,0,0.1478559970855713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,96,128,0,1,float16,fp8,0,0.1463093360265096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,96,128,0,1,fp8,fp8,0,0.1482080022493998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,1,128,0,1,float16,float16,0,0.13896532853444418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,1,128,0,1,float16,fp8,0,0.13860799868901572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,1,128,0,1,fp8,fp8,0,0.12991467118263245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,4,128,0,1,float16,float16,0,0.1381439963976542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,4,128,0,1,float16,fp8,0,0.13807466626167297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,4,128,0,1,fp8,fp8,0,0.13010666767756143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,8,128,0,1,float16,float16,0,0.13958932956059775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,8,128,0,1,float16,fp8,0,0.13983466227849325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,96,128,0,1,float16,float16,0,0.07984533409277599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,96,8,128,0,1,fp8,fp8,0,0.13104533155759177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,96,128,0,1,float16,fp8,0,0.08053866525491078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,96,128,0,1,fp8,fp8,0,0.07726933558781941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,1,128,0,1,float16,float16,0,0.0765226682027181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,1,128,0,1,float16,fp8,0,0.07814399898052216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,1,128,0,1,fp8,fp8,0,0.07252266506354015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,4,128,0,1,float16,float16,0,0.07729599873224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,4,128,0,1,float16,fp8,0,0.07699200014273326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,4,128,0,1,fp8,fp8,0,0.0726453314224879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,8,128,0,1,float16,float16,0,0.07702399790287018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,8,128,0,1,float16,fp8,0,0.07660266757011414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,96,8,128,0,1,fp8,fp8,0,0.07340266803900401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,96,128,0,1,float16,float16,0,0.048058668772379555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,96,128,0,1,float16,fp8,0,0.0476693312327067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,96,128,0,1,fp8,fp8,0,0.04599999884764353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,1,128,0,1,float16,float16,0,0.04577599962552389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,1,128,0,1,float16,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,1,128,0,1,fp8,fp8,0,0.043391997615496315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,4,128,0,1,float16,float16,0,0.04570133487383524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,4,128,0,1,float16,fp8,0,0.04628799855709076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,4,128,0,1,fp8,fp8,0,0.043866669138272606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,8,128,0,1,float16,float16,0,0.04619733492533366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,8,128,0,1,float16,fp8,0,0.04560533165931702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,96,8,128,0,1,fp8,fp8,0,0.04438399771849314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,96,128,0,1,float16,float16,0,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,96,128,0,1,float16,fp8,0,0.03160000095764796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,96,128,0,1,fp8,fp8,0,0.031152000029881794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,1,128,0,1,float16,float16,0,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,1,128,0,1,float16,fp8,0,0.029482667644818623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,1,128,0,1,fp8,fp8,0,0.02920000006755193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,4,128,0,1,float16,float16,0,0.029306667546431225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,4,128,0,1,float16,fp8,0,0.03047466774781545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,4,128,0,1,fp8,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,8,128,0,1,float16,float16,0,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,8,128,0,1,float16,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,96,8,128,0,1,fp8,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,96,128,0,1,float16,float16,0,0.02160533269246419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,96,128,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,96,128,0,1,fp8,fp8,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,1,128,0,1,float16,float16,0,0.021957332889238994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,1,128,0,1,float16,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,1,128,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,4,128,0,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,4,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,4,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,8,128,0,1,float16,float16,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,8,128,0,1,float16,fp8,0,0.02189333240191142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,96,8,128,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,96,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,96,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,96,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,1,128,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,1,128,0,1,float16,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,1,128,0,1,fp8,fp8,0,0.017717332889636356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,4,128,0,1,float16,float16,0,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,4,128,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,4,128,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,8,128,0,1,float16,float16,0,0.018805333723624546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,8,128,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,96,8,128,0,1,fp8,fp8,0,0.018922666708628338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,96,128,0,1,float16,float16,0,0.015861333658297855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,96,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,96,128,0,1,fp8,fp8,0,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,1,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,1,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,1,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,4,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,4,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,4,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,8,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,8,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,96,8,128,0,1,float16,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,0,0.4287573496500651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,0,0.42749865849812824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,1,128,0,1,fp8,fp8,0,0.3892693519592285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,0,0.4270879824956258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,0,0.4273173411687215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,4,128,0,1,fp8,fp8,0,0.39033599694569904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,0,0.4285386800765991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,0,0.42899731794993085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,96,8,128,0,1,fp8,fp8,0,0.38942933082580566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,96,128,0,1,float16,float16,0,0.2220319906870524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,96,128,0,1,float16,fp8,0,0.222271998723348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,96,128,0,1,fp8,fp8,0,0.2037280003229777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,0,0.2214453419049581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,0,0.22035199403762817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,1,128,0,1,fp8,fp8,0,0.2015626629193624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,0,0.21990400552749634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,0,0.22019733985265097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,4,128,0,1,fp8,fp8,0,0.20172266165415445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,0,0.22167466084162393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,0,0.22009599208831787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,96,8,128,0,1,fp8,fp8,0,0.20164267222086588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,96,128,0,1,float16,float16,0,0.11788800358772278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,96,128,0,1,float16,fp8,0,0.11769066254297893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,96,128,0,1,fp8,fp8,0,0.10863999525705974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,0,0.11764267086982727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,0,0.11752532919247945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,1,128,0,1,fp8,fp8,0,0.1076746682325999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,0,0.1181653340657552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,4,128,0,1,fp8,fp8,0,0.1074133316675822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,0,0.12026666601498921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,0,0.11731732885042827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,0,0.1184266706307729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,96,8,128,0,1,fp8,fp8,0,0.10774399836858113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,96,128,0,1,float16,float16,0,0.06650133430957794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,96,128,0,1,float16,fp8,0,0.06644266843795776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,96,128,0,1,fp8,fp8,0,0.06211733321348826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,0,0.06632533172766368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,1,128,0,1,fp8,fp8,0,0.06206933160622915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,0,0.06874133149782817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,0,0.06719466547171275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,0,0.06826133529345195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,4,128,0,1,fp8,fp8,0,0.06020266811052958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,0,0.06985066831111908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,0,0.07453866799672444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,96,8,128,0,1,fp8,fp8,0,0.06177066763242086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,96,128,0,1,float16,float16,0,0.04162133236726125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,96,128,0,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,96,128,0,1,fp8,fp8,0,0.03740799923737844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,0,0.04164800047874451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,0,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,1,128,0,1,fp8,fp8,0,0.037765334049860634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,0,0.0429013321797053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,0,0.04186666508515676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,4,128,0,1,fp8,fp8,0,0.03930133332808813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,0,0.041509332756201424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,96,8,128,0,1,fp8,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,96,128,0,1,float16,float16,0,0.027530667682488758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,96,128,0,1,float16,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,96,128,0,1,fp8,fp8,0,0.028730665644009907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,1,128,0,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,4,128,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,0,0.02943466603755951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,0,0.02757333219051361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,96,8,128,0,1,fp8,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,96,128,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,96,128,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,96,128,0,1,fp8,fp8,0,0.020469332734743755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,0,0.021776000658671062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,1,128,0,1,fp8,fp8,0,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,4,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,96,8,128,0,1,fp8,fp8,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,96,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,96,128,0,1,float16,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,96,128,0,1,fp8,fp8,0,0.01878400022784869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,1,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,4,128,0,1,fp8,fp8,0,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,0,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,96,8,128,0,1,fp8,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,96,128,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,96,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,96,128,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,1,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,0,0.015962666521469753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,4,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,96,8,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,fp8,0,22.734059651692707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,1,128,0,1,float16,float16,0,22.89466094970703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,1,128,0,1,fp8,fp8,0,17.659408569335938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,float16,0,23.364603678385418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,2,128,0,1,fp8,fp8,0,17.545035044352215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,2,128,0,1,float16,fp8,0,23.770421346028645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,4,128,0,1,fp8,fp8,0,17.766143798828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,float16,0,23.18615468343099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,4,128,0,1,float16,fp8,0,23.708694458007812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,8,128,0,1,fp8,fp8,0,17.65124257405599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,float16,0,24.0504633585612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,64,8,128,0,1,float16,fp8,0,22.86707305908203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,float16,0,12.666000366210938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,64,128,0,1,float16,fp8,0,12.610949198404947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,64,128,0,1,fp8,fp8,0,9.25652821858724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,float16,0,11.8535525004069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,1,128,0,1,float16,fp8,0,12.131360371907553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,float16,0,11.412757873535156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,1,128,0,1,fp8,fp8,0,8.84825070699056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,2,128,0,1,float16,fp8,0,11.911125183105469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,2,128,0,1,fp8,fp8,0,8.939178466796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,float16,0,12.121898651123047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,4,128,0,1,fp8,fp8,0,8.899322509765625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,float16,0,11.946800231933594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,8,128,0,1,fp8,fp8,0,8.957232157389322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,4,128,0,1,float16,fp8,0,11.773578643798828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,float16,0,6.307674407958984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,64,8,128,0,1,float16,fp8,0,11.60861841837565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,64,128,0,1,fp8,fp8,0,4.830202738444011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,64,128,0,1,float16,fp8,0,6.178426742553711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,float16,0,6.070165634155273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,1,128,0,1,float16,fp8,0,5.740863800048828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,1,128,0,1,fp8,fp8,0,4.613504091898601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,float16,0,6.1077117919921875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,2,128,0,1,float16,fp8,0,6.148495992024739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,2,128,0,1,fp8,fp8,0,4.607621192932129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,4,128,0,1,fp8,fp8,0,4.643717447916667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,fp8,0,5.929914474487305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,float16,0,5.855829238891602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,4,128,0,1,float16,float16,0,5.845370610555013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,float16,0,3.1120640436808267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,8,128,0,1,fp8,fp8,0,4.63698132832845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,64,128,0,1,float16,fp8,0,3.167941411336263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,64,8,128,0,1,float16,fp8,0,6.130704243977864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,64,128,0,1,fp8,fp8,0,2.6027414004007974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,float16,0,2.9893547693888345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,1,128,0,1,fp8,fp8,0,2.509328047434489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,1,128,0,1,float16,fp8,0,3.0304320653279624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,float16,0,2.9930187861124673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,2,128,0,1,fp8,fp8,0,2.5092639923095703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,2,128,0,1,float16,fp8,0,3.027279853820801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,float16,0,3.056117375691732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,4,128,0,1,float16,fp8,0,3.0069920221964517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,4,128,0,1,fp8,fp8,0,2.5142079989115396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,float16,0,3.031221389770508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,8,128,0,1,float16,fp8,0,3.046245257059733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,64,8,128,0,1,fp8,fp8,0,2.513274669647217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,1,128,0,1,fp8,fp8,0,10.429349263509115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,float16,0,13.583428700764975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,1,128,0,1,float16,fp8,0,13.620037078857422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,float16,0,13.698570251464844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,2,128,0,1,fp8,fp8,0,10.851812998453775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,2,128,0,1,float16,fp8,0,14.237536112467447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,float16,0,13.753551483154297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,4,128,0,1,float16,fp8,0,14.078272501627604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,4,128,0,1,fp8,fp8,0,10.673712412516275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,float16,0,13.966837565104166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,8,128,0,1,fp8,fp8,0,10.563781102498373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,64,8,128,0,1,float16,fp8,0,14.021748860677084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,float16,0,7.414634704589844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,64,128,0,1,fp8,fp8,0,5.698303858439128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,64,128,0,1,float16,fp8,0,7.371530532836914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,float16,0,6.579738616943359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,1,128,0,1,float16,fp8,0,7.184917449951172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,1,128,0,1,fp8,fp8,0,5.3356583913167315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,float16,0,6.8279571533203125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,2,128,0,1,fp8,fp8,0,5.42578125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,2,128,0,1,float16,fp8,0,6.7074400583903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,float16,0,6.950933456420898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,4,128,0,1,fp8,fp8,0,5.370847702026367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,4,128,0,1,float16,fp8,0,7.196677525838216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,float16,0,6.938831965128581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,float16,0,3.761845270792643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,8,128,0,1,fp8,fp8,0,5.444735844930013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,64,128,0,1,float16,fp8,0,3.759354591369629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,64,8,128,0,1,float16,fp8,0,6.937349319458008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,64,128,0,1,fp8,fp8,0,2.9862934748331704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,float16,0,3.450528144836426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,1,128,0,1,fp8,fp8,0,2.81987730662028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,1,128,0,1,float16,fp8,0,3.386752128601074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,float16,0,3.398634592692057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,2,128,0,1,fp8,fp8,0,2.822688102722168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,2,128,0,1,float16,fp8,0,3.4981279373168945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,float16,0,3.4111948013305664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,4,128,0,1,float16,fp8,0,3.4479198455810547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,4,128,0,1,fp8,fp8,0,2.8291145960489907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,float16,0,3.424746513366699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,8,128,0,1,float16,fp8,0,3.421349207560221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,float16,0,1.9128692944844563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,64,8,128,0,1,fp8,fp8,0,2.8496532440185547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,64,128,0,1,float16,fp8,0,1.9774667421976726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,64,128,0,1,fp8,fp8,0,1.7809173266092937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,float16,0,1.799605369567871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,1,128,0,1,float16,fp8,0,1.8343520164489746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,1,128,0,1,fp8,fp8,0,1.5631200472513835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,float16,0,1.8308374087015789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,2,128,0,1,fp8,fp8,0,1.5693386395772297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,2,128,0,1,float16,fp8,0,1.8722453117370605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,float16,0,1.8384480476379395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,4,128,0,1,fp8,fp8,0,1.6430880228678386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,4,128,0,1,float16,fp8,0,1.8392799695332844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,float16,0,1.8773706754048665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,8,128,0,1,fp8,fp8,0,1.5702932675679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,64,8,128,0,1,float16,fp8,0,1.8517227172851562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,1,128,0,1,fp8,fp8,0,7.597221374511719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,float16,0,9.628719965616861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,1,128,0,1,float16,fp8,0,9.782133102416992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,float16,0,9.666757583618164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,2,128,0,1,fp8,fp8,0,7.618581136067708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,2,128,0,1,float16,fp8,0,9.880330403645834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,float16,0,9.948538462320963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,4,128,0,1,float16,fp8,0,9.767333348592123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,4,128,0,1,fp8,fp8,0,7.745669047037761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,float16,0,10.087055842081705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,8,128,0,1,fp8,fp8,0,7.719189325968425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,64,8,128,0,1,float16,fp8,0,10.160133361816406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,float16,0,5.236240069071452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,64,128,0,1,fp8,fp8,0,4.182415962219238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,64,128,0,1,float16,fp8,0,5.340576171875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,float16,0,4.856842676798503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,1,128,0,1,float16,fp8,0,4.832223892211914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,1,128,0,1,fp8,fp8,0,3.9021120071411133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,float16,0,4.879973411560059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,2,128,0,1,fp8,fp8,0,3.92087459564209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,2,128,0,1,float16,fp8,0,4.956063906351726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,float16,0,4.869386672973633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,4,128,0,1,fp8,fp8,0,3.9402186075846353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,4,128,0,1,float16,fp8,0,5.150704065958659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,float16,0,5.0508371988932295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,8,128,0,1,float16,fp8,0,4.84387747446696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,float16,0,2.7918132146199546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,64,8,128,0,1,fp8,fp8,0,3.940922737121582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,64,128,0,1,float16,fp8,0,2.7142454783121743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,64,128,0,1,fp8,fp8,0,2.389557361602783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,float16,0,2.496901353200277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,1,128,0,1,float16,fp8,0,2.4635465939839682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,1,128,0,1,fp8,fp8,0,2.0845600763956704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,float16,0,2.440762678782145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,2,128,0,1,float16,fp8,0,2.5113706588745117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,2,128,0,1,fp8,fp8,0,2.096405347188314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,float16,0,2.5011253356933594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,4,128,0,1,fp8,fp8,0,2.095834732055664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,4,128,0,1,float16,fp8,0,2.4887253443400064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,float16,0,2.502064069112142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,8,128,0,1,float16,fp8,0,2.530954678853353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,float16,0,1.501039981842041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,64,128,0,1,float16,fp8,0,1.4949439366658528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,64,8,128,0,1,fp8,fp8,0,2.1039253870646157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,64,128,0,1,fp8,fp8,0,1.2305813630421956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,float16,0,1.345813274383545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,1,128,0,1,fp8,fp8,0,1.17686931292216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,1,128,0,1,float16,fp8,0,1.3474079767862956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,float16,0,1.3530880610148113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,2,128,0,1,float16,fp8,0,1.356592019399007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,2,128,0,1,fp8,fp8,0,1.1776213645935059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,float16,0,1.3771093686421711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,4,128,0,1,float16,fp8,0,1.3652159372965496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,4,128,0,1,fp8,fp8,0,1.2091519832611084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,float16,0,1.3702932993570964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,8,128,0,1,float16,fp8,0,1.3639307022094727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,64,8,128,0,1,fp8,fp8,0,1.2188959916432698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,1,128,0,1,fp8,fp8,0,10.325338363647461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,float16,0,12.81603749593099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,1,128,0,1,float16,fp8,0,13.291051228841146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,float16,0,12.979546864827475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,2,128,0,1,fp8,fp8,0,10.441904067993164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,2,128,0,1,float16,fp8,0,12.880442301432291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,float16,0,13.369402567545572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,4,128,0,1,float16,fp8,0,13.932042439778646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,4,128,0,1,fp8,fp8,0,10.428069432576498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,float16,0,13.724352518717447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,8,128,0,1,fp8,fp8,0,10.469589233398438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,64,8,128,0,1,float16,fp8,0,13.445621490478516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,float16,0,7.1758988698323565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,64,128,0,1,fp8,fp8,0,5.661904017130534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,64,128,0,1,float16,fp8,0,7.028746922810872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,float16,0,6.587146759033203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,1,128,0,1,float16,fp8,0,6.685136159261067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,1,128,0,1,fp8,fp8,0,5.206735928853353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,float16,0,6.525626500447591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,2,128,0,1,fp8,fp8,0,5.239040056864421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,2,128,0,1,float16,fp8,0,6.809232076009114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,float16,0,6.886341094970703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,4,128,0,1,float16,fp8,0,6.561360041300456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,4,128,0,1,fp8,fp8,0,5.228672027587891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,float16,0,6.562837600708008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,8,128,0,1,float16,fp8,0,6.669712066650391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,float16,0,3.524314562479655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,64,8,128,0,1,fp8,fp8,0,5.264730771382649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,64,128,0,1,float16,fp8,0,3.7237332661946616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,64,128,0,1,fp8,fp8,0,2.929658571879069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,float16,0,3.1900478998819985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,1,128,0,1,float16,fp8,0,3.228549321492513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,1,128,0,1,fp8,fp8,0,2.6954774856567383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,float16,0,3.2708959579467773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,2,128,0,1,float16,fp8,0,3.2498133977254233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,2,128,0,1,fp8,fp8,0,2.699712117513021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,float16,0,3.236250559488932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,4,128,0,1,float16,fp8,0,3.354341189066569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,4,128,0,1,fp8,fp8,0,2.7129812240600586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,float16,0,3.327967961629232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,8,128,0,1,fp8,fp8,0,2.730682690938314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,64,8,128,0,1,float16,fp8,0,3.3329760233561196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,float16,0,1.8489386240641277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,64,128,0,1,fp8,fp8,0,1.5719520250956218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,64,128,0,1,float16,fp8,0,1.8866772651672363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,float16,0,1.7448639869689941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,1,128,0,1,float16,fp8,0,1.689578692118327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,1,128,0,1,fp8,fp8,0,1.4571146965026855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,float16,0,1.689568042755127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,2,128,0,1,float16,fp8,0,1.7235040664672852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,2,128,0,1,fp8,fp8,0,1.4571733474731445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,float16,0,1.7115039825439453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,4,128,0,1,fp8,fp8,0,1.4611093203226726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,4,128,0,1,float16,fp8,0,1.7097973823547363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,float16,0,1.7086505889892578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,float16,0,1.0256000359853108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,8,128,0,1,float16,fp8,0,1.7524959246317546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,64,8,128,0,1,fp8,fp8,0,1.4668159484863281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,64,128,0,1,float16,fp8,0,1.0427359739939372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,64,128,0,1,fp8,fp8,0,0.8903199831644694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,float16,0,0.9699467023213705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,1,128,0,1,fp8,fp8,0,0.8356106281280518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,1,128,0,1,float16,fp8,0,0.9382239977518717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,float16,0,0.9526560306549072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,2,128,0,1,fp8,fp8,0,0.8361013730367025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,2,128,0,1,float16,fp8,0,0.9564266999562582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,float16,0,0.9467626412709554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,4,128,0,1,float16,fp8,0,0.9581226507822672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,4,128,0,1,fp8,fp8,0,0.8381280104319254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,float16,0,0.9643786748250326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,8,128,0,1,float16,fp8,0,0.9547946453094482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,64,8,128,0,1,fp8,fp8,0,0.8398026625315348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,1,128,0,1,fp8,fp8,0,6.396607716878255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,float16,0,7.709056218465169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,1,128,0,1,float16,fp8,0,7.704778671264648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,float16,0,7.797034581502278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,2,128,0,1,fp8,fp8,0,6.419930775960286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,2,128,0,1,float16,fp8,0,7.680853525797526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,float16,0,7.749104181925456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,4,128,0,1,float16,fp8,0,7.798986434936523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,4,128,0,1,fp8,fp8,0,6.439477284749349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,float16,0,7.848122914632161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,8,128,0,1,fp8,fp8,0,6.482410430908203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,64,8,128,0,1,float16,fp8,0,8.194154739379883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,float16,0,4.341381390889485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,64,128,0,1,float16,fp8,0,4.422858556111653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,64,128,0,1,fp8,fp8,0,3.616959889729818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,float16,0,3.898522694905599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,1,128,0,1,float16,fp8,0,3.8638346989949546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,1,128,0,1,fp8,fp8,0,3.2516905466715493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,float16,0,3.8980372746785483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,2,128,0,1,float16,fp8,0,3.942506790161133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,2,128,0,1,fp8,fp8,0,3.2596426010131836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,float16,0,3.904143969217936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,4,128,0,1,float16,fp8,0,3.9256213506062827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,4,128,0,1,fp8,fp8,0,3.2706292470296225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,float16,0,3.9282401402791343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,float16,0,2.214458624521891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,8,128,0,1,float16,fp8,0,3.9617493947347007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,64,8,128,0,1,fp8,fp8,0,3.2897653579711914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,64,128,0,1,float16,fp8,0,2.261712074279785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,64,128,0,1,fp8,fp8,0,1.9488372802734375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,float16,0,2.0324533780415854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,1,128,0,1,float16,fp8,0,1.9756266276041667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,1,128,0,1,fp8,fp8,0,1.70524263381958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,float16,0,1.9861067136128743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,2,128,0,1,float16,fp8,0,1.9753066698710124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,2,128,0,1,fp8,fp8,0,1.700069268544515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,float16,0,1.9982293446858723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,4,128,0,1,float16,fp8,0,2.028127988179525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,4,128,0,1,fp8,fp8,0,1.7106186548868816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,float16,0,2.0187999407450357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,8,128,0,1,float16,fp8,0,2.0435253779093423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,float16,0,1.1802826722462971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,64,8,128,0,1,fp8,fp8,0,1.7505547205607097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,64,128,0,1,float16,fp8,0,1.2100266615549724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,64,128,0,1,fp8,fp8,0,1.0844586690266926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,float16,0,1.0567413171132405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,1,128,0,1,fp8,fp8,0,0.9368213017781576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,1,128,0,1,float16,fp8,0,1.0723360379536946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,float16,0,1.0700159867604573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,2,128,0,1,float16,fp8,0,1.0697706540425618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,2,128,0,1,fp8,fp8,0,0.9364480177561442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,float16,0,1.0755840142567952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,4,128,0,1,float16,fp8,0,1.0816853046417236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,4,128,0,1,fp8,fp8,0,0.9381546974182129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,float16,0,1.0797706445058186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,8,128,0,1,float16,fp8,0,1.0878667036692302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,64,8,128,0,1,fp8,fp8,0,0.9455786546071371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,float16,0,0.6713279883066813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,64,128,0,1,float16,fp8,0,0.682703971862793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,64,128,0,1,fp8,fp8,0,0.5936160087585449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,float16,0,0.6124586661656698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,1,128,0,1,float16,fp8,0,0.6084373394648234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,1,128,0,1,fp8,fp8,0,0.551578680674235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,float16,0,0.6163040002187093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,2,128,0,1,float16,fp8,0,0.6127466758092245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,2,128,0,1,fp8,fp8,0,0.5507359902064005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,float16,0,0.6207679907480875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,4,128,0,1,float16,fp8,0,0.617792010307312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,4,128,0,1,fp8,fp8,0,0.552570660909017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,float16,0,0.6227786540985107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,8,128,0,1,float16,fp8,0,0.6222879886627197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,64,8,128,0,1,fp8,fp8,0,0.5547039906183878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,1,128,0,1,fp8,fp8,0,6.748661041259766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,float16,0,7.924330393473308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,1,128,0,1,float16,fp8,0,7.987552007039388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,float16,0,8.011685053507486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,2,128,0,1,fp8,fp8,0,6.781221389770508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,2,128,0,1,float16,fp8,0,7.9180959065755205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,float16,0,8.020458857218424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,4,128,0,1,float16,fp8,0,7.996320088704427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,4,128,0,1,fp8,fp8,0,6.778826395670573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,float16,0,8.096549352010092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,8,128,0,1,float16,fp8,0,8.159861246744791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,float16,0,4.566255887349446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,64,8,128,0,1,fp8,fp8,0,6.847178777058919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,64,128,0,1,float16,fp8,0,4.589610735575358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,64,128,0,1,fp8,fp8,0,3.8580748240152993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,float16,0,3.902421315511068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,1,128,0,1,fp8,fp8,0,3.371136029561361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,1,128,0,1,float16,fp8,0,3.9731038411458335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,float16,0,3.908005396525065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,2,128,0,1,float16,fp8,0,3.9859307607014975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,2,128,0,1,fp8,fp8,0,3.3767573038736978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,float16,0,4.012186686197917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,4,128,0,1,float16,fp8,0,3.975856145222982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,4,128,0,1,fp8,fp8,0,3.3923412958780923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,float16,0,4.038912137349446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,8,128,0,1,float16,fp8,0,4.092063903808594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,64,8,128,0,1,fp8,fp8,0,3.4220107396443686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,float16,0,2.2943785985310874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,64,128,0,1,float16,fp8,0,2.349621295928955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,64,128,0,1,fp8,fp8,0,2.0194880167643228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,float16,0,1.9892640113830566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,1,128,0,1,float16,fp8,0,1.9930987358093262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,1,128,0,1,fp8,fp8,0,1.7394827206929524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,float16,0,2.0091840426127114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,2,128,0,1,float16,fp8,0,2.0075999895731607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,2,128,0,1,fp8,fp8,0,1.7403573989868164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,float16,0,2.0096373558044434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,4,128,0,1,float16,fp8,0,2.0537919998168945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,4,128,0,1,fp8,fp8,0,1.8417545954386394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,float16,0,2.0225119590759277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,8,128,0,1,float16,fp8,0,2.053285280863444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,float16,0,1.2083040078481038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,64,128,0,1,float16,fp8,0,1.2402933438618977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,64,8,128,0,1,fp8,fp8,0,1.838714599609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,64,128,0,1,fp8,fp8,0,1.0476426283518474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,float16,0,1.0491680304209392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,1,128,0,1,float16,fp8,0,1.0458347002665203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,1,128,0,1,fp8,fp8,0,0.9258986314137777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,float16,0,1.0534559885660808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,2,128,0,1,float16,fp8,0,1.0600533485412598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,2,128,0,1,fp8,fp8,0,0.9252266883850098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,float16,0,1.0638079643249512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,4,128,0,1,fp8,fp8,0,0.9333120187123617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,4,128,0,1,float16,fp8,0,1.0679466724395752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,float16,0,1.0633172988891602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,8,128,0,1,float16,fp8,0,1.0777119795481365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,float16,0,0.6608959833780924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,64,8,128,0,1,fp8,fp8,0,0.9385386308034261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,64,128,0,1,float16,fp8,0,0.6662506659825643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,64,128,0,1,fp8,fp8,0,0.5830880006154379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,float16,0,0.5789706707000732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,1,128,0,1,float16,fp8,0,0.582858681678772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,1,128,0,1,fp8,fp8,0,0.5218026638031006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,float16,0,0.583845337231954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,2,128,0,1,float16,fp8,0,0.5857439835866293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,2,128,0,1,fp8,fp8,0,0.5229653517405192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,float16,0,0.5840799808502197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,4,128,0,1,float16,fp8,0,0.5909813245137533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,4,128,0,1,fp8,fp8,0,0.5239520072937012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,float16,0,0.5933386484781901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,8,128,0,1,float16,fp8,0,0.5928159952163696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,64,8,128,0,1,fp8,fp8,0,0.5280746618906657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,float16,0,0.38645867506663006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,64,128,0,1,float16,fp8,0,0.394048015276591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,64,128,0,1,fp8,fp8,0,0.3470880190531413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,float16,0,0.34308799107869464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,1,128,0,1,float16,fp8,0,0.3429866631825765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,1,128,0,1,fp8,fp8,0,0.3181546727816264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,float16,0,0.34006400903066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,2,128,0,1,float16,fp8,0,0.3452746470769246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,2,128,0,1,fp8,fp8,0,0.3184746702512105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,fp8,0,0.3451626698176066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,4,128,0,1,float16,float16,0,0.3471839825312297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,4,128,0,1,fp8,fp8,0,0.3192800084749858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,float16,0,0.3531680107116699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,8,128,0,1,float16,fp8,0,0.3516106605529785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,64,8,128,0,1,fp8,fp8,0,0.3211733301480611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,1,128,0,1,fp8,fp8,0,4.350314776102702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,float16,0,5.033210754394531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,1,128,0,1,float16,fp8,0,5.041637420654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,float16,0,5.0344851811726885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,2,128,0,1,fp8,fp8,0,4.38044802347819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,2,128,0,1,float16,fp8,0,5.068085352579753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,float16,0,5.034970601399739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,4,128,0,1,fp8,fp8,0,4.394650777180989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,4,128,0,1,float16,fp8,0,5.091866811116536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,float16,0,5.109807968139648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,8,128,0,1,float16,fp8,0,5.1456906000773115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,float16,0,2.9193493525187173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,64,128,0,1,float16,fp8,0,2.9540160497029624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,64,8,128,0,1,fp8,fp8,0,4.437306722005208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,64,128,0,1,fp8,fp8,0,2.532693386077881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,float16,0,2.504458745320638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,1,128,0,1,fp8,fp8,0,2.1982666651407876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,1,128,0,1,float16,fp8,0,2.5111519495646157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,float16,0,2.493216037750244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,2,128,0,1,float16,fp8,0,2.52620267868042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,2,128,0,1,fp8,fp8,0,2.2041865984598794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,float16,0,2.5471787452697754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,4,128,0,1,float16,fp8,0,2.5291786193847656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,4,128,0,1,fp8,fp8,0,2.2129759788513184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,float16,0,2.5608906745910645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,8,128,0,1,float16,fp8,0,2.5753866831461587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,float16,0,1.4968372980753581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,64,8,128,0,1,fp8,fp8,0,2.2301440238952637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,64,128,0,1,float16,fp8,0,1.5293067296346028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,64,128,0,1,fp8,fp8,0,1.3267573515574138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,float16,0,1.2930933634440105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,1,128,0,1,float16,fp8,0,1.290560007095337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,1,128,0,1,fp8,fp8,0,1.1485546429951985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,float16,0,1.30402668317159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,2,128,0,1,float16,fp8,0,1.3029066721598308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,2,128,0,1,fp8,fp8,0,1.1458880106608074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,float16,0,1.3076426982879639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,4,128,0,1,float16,fp8,0,1.3139893213907878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,4,128,0,1,fp8,fp8,0,1.1523626645406086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,float16,0,1.3190346558888753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,8,128,0,1,float16,fp8,0,1.3292319774627686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,float16,0,0.7921760082244873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,64,8,128,0,1,fp8,fp8,0,1.16267196337382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,64,128,0,1,float16,fp8,0,0.8040533065795898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,64,128,0,1,fp8,fp8,0,0.7034613291422526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,float16,0,0.6887359619140625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,1,128,0,1,float16,fp8,0,0.6950666904449463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,1,128,0,1,fp8,fp8,0,0.6157279809316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,float16,0,0.6945226987202963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,2,128,0,1,float16,fp8,0,0.6980960369110107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,2,128,0,1,fp8,fp8,0,0.6185226837793986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,float16,0,0.692570686340332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,4,128,0,1,fp8,fp8,0,0.6196053425470988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,4,128,0,1,float16,fp8,0,0.7006613413492838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,float16,0,0.7057387034098307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,float16,0,0.4434719880421956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,8,128,0,1,float16,fp8,0,0.7030453681945801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,64,8,128,0,1,fp8,fp8,0,0.6259893178939819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,64,128,0,1,float16,fp8,0,0.4510879913965861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,float16,0,0.38467200597127277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,64,128,0,1,fp8,fp8,0,0.3981599807739258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,1,128,0,1,float16,fp8,0,0.389354666074117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,1,128,0,1,fp8,fp8,0,0.3546453317006429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,float16,0,0.38733331362406415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,2,128,0,1,float16,fp8,0,0.39449067910512287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,2,128,0,1,fp8,fp8,0,0.35438398520151776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,float16,0,0.39244266351064044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,4,128,0,1,fp8,fp8,0,0.35726932684580487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,4,128,0,1,float16,fp8,0,0.39337066809336346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,float16,0,0.3976159890492757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,8,128,0,1,float16,fp8,0,0.3999040126800537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,float16,0,0.266704003016154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,64,8,128,0,1,fp8,fp8,0,0.3603839874267578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,64,128,0,1,float16,fp8,0,0.27247466643651325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,64,128,0,1,fp8,fp8,0,0.24493332703908285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,float16,0,0.22958399852116904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,1,128,0,1,float16,fp8,0,0.23262399435043335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,1,128,0,1,fp8,fp8,0,0.21837866306304932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,float16,0,0.23144533236821493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,2,128,0,1,float16,fp8,0,0.23047999540964761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,2,128,0,1,fp8,fp8,0,0.21855467557907104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,float16,0,0.23370667298634848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,4,128,0,1,float16,fp8,0,0.2347093423207601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,4,128,0,1,fp8,fp8,0,0.2213653326034546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,float16,0,0.23727999130884805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,8,128,0,1,float16,fp8,0,0.23819732666015625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,64,8,128,0,1,fp8,fp8,0,0.22426132361094156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,1,128,0,1,fp8,fp8,0,4.89851729075114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,float16,0,5.513231913248698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,1,128,0,1,float16,fp8,0,5.585488001505534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,float16,0,5.600565592447917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,2,128,0,1,fp8,fp8,0,4.921258608500163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,2,128,0,1,float16,fp8,0,5.589199701944987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,float16,0,5.6061757405598955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,4,128,0,1,float16,fp8,0,5.615493138631185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,4,128,0,1,fp8,fp8,0,4.92574946085612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,float16,0,5.649706522623698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,8,128,0,1,fp8,fp8,0,5.009519894917806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,64,8,128,0,1,float16,fp8,0,5.698650360107422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,float16,0,3.2246665954589844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,64,128,0,1,fp8,fp8,0,2.866901397705078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,float16,0,2.751466751098633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,64,128,0,1,float16,fp8,0,3.2789173126220703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,1,128,0,1,float16,fp8,0,2.739247957865397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,1,128,0,1,fp8,fp8,0,2.4399894078572593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,float16,0,2.757007916768392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,2,128,0,1,float16,fp8,0,2.7540105183919272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,2,128,0,1,fp8,fp8,0,2.446725368499756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,float16,0,2.7733866373697915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,4,128,0,1,fp8,fp8,0,2.46725861231486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,4,128,0,1,float16,fp8,0,2.7786668141682944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,float16,0,2.81220277150472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,float16,0,1.6435680389404297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,8,128,0,1,fp8,fp8,0,2.4905759493509927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,64,8,128,0,1,float16,fp8,0,2.827141443888346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,64,128,0,1,float16,fp8,0,1.673210620880127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,64,128,0,1,fp8,fp8,0,1.4657333691914876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,float16,0,1.403557300567627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,1,128,0,1,fp8,fp8,0,1.2493333021799724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,1,128,0,1,float16,fp8,0,1.4039093653361003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,float16,0,1.3972105979919434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,2,128,0,1,fp8,fp8,0,1.2528746922810872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,2,128,0,1,float16,fp8,0,1.4155467351277669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,float16,0,1.4177227020263672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,4,128,0,1,float16,fp8,0,1.4193332990010579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,4,128,0,1,fp8,fp8,0,1.2610507011413574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,float16,0,1.4363306363423665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,8,128,0,1,float16,fp8,0,1.4449280103047688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,64,8,128,0,1,fp8,fp8,0,1.2744426727294922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,float16,0,0.8554826577504476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,64,128,0,1,fp8,fp8,0,0.7716853618621826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,64,128,0,1,float16,fp8,0,0.8759146531422933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,float16,0,0.7279253005981445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,1,128,0,1,float16,fp8,0,0.7341333230336508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,1,128,0,1,fp8,fp8,0,0.6588586568832397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,float16,0,0.7357439994812012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,2,128,0,1,float16,fp8,0,0.7365866502126058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,2,128,0,1,fp8,fp8,0,0.6608906586964926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,float16,0,0.7415573596954346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,4,128,0,1,float16,fp8,0,0.7476639747619629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,4,128,0,1,fp8,fp8,0,0.6660213470458984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,float16,0,0.7499306996663412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,8,128,0,1,float16,fp8,0,0.755237340927124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,64,8,128,0,1,fp8,fp8,0,0.673354705174764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,float16,0,0.4599093198776245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,64,128,0,1,float16,fp8,0,0.4711199998855591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,64,128,0,1,fp8,fp8,0,0.41842134793599445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,float16,0,0.39816534519195557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,1,128,0,1,float16,fp8,0,0.3971253236134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,1,128,0,1,fp8,fp8,0,0.3625013430913289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,float16,0,0.3995893398920695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,2,128,0,1,float16,fp8,0,0.4013013442357381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,2,128,0,1,fp8,fp8,0,0.3634026845296224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,float16,0,0.40330668290456134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,4,128,0,1,fp8,fp8,0,0.3659093379974365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,4,128,0,1,float16,fp8,0,0.4044693311055501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,float16,0,0.40724265575408936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,8,128,0,1,float16,fp8,0,0.4111786683400472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,float16,0,0.26447467009226483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,64,8,128,0,1,fp8,fp8,0,0.36813334623972577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,64,128,0,1,float16,fp8,0,0.26952532927195233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,64,128,0,1,fp8,fp8,0,0.24253867069880167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,float16,0,0.22427733739217123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,1,128,0,1,float16,fp8,0,0.22579733530680338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,1,128,0,1,fp8,fp8,0,0.2134773333867391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,float16,0,0.2249120076497396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,2,128,0,1,float16,fp8,0,0.22800532976786295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,2,128,0,1,fp8,fp8,0,0.21421867609024048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,float16,0,0.22669333219528198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,4,128,0,1,float16,fp8,0,0.23061867554982504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,4,128,0,1,fp8,fp8,0,0.21554134289423624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,float16,0,0.23411200443903604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,8,128,0,1,float16,fp8,0,0.23276267449061075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,64,8,128,0,1,fp8,fp8,0,0.21687465906143188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,float16,0,0.1681386629740397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,64,128,0,1,float16,fp8,0,0.17040000359217325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,64,128,0,1,fp8,fp8,0,0.1523360013961792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,float16,0,0.14216533303260803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,1,128,0,1,float16,fp8,0,0.14341333508491516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,1,128,0,1,fp8,fp8,0,0.13435199856758118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,float16,0,0.1421119968096415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,2,128,0,1,float16,fp8,0,0.1439626713593801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,2,128,0,1,fp8,fp8,0,0.13615467151006064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,float16,0,0.1421119968096415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,4,128,0,1,float16,fp8,0,0.14325333635012308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,4,128,0,1,fp8,fp8,0,0.1351573367913564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,float16,0,0.14406933387120566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,8,128,0,1,float16,fp8,0,0.14442666371663412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,64,8,128,0,1,fp8,fp8,0,0.1360586682955424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,1,128,0,1,fp8,fp8,0,3.335520108540853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,float16,0,3.719402631123861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,1,128,0,1,float16,fp8,0,3.7342774073282876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,float16,0,3.7226880391438804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,2,128,0,1,fp8,fp8,0,3.355957349141439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,2,128,0,1,float16,fp8,0,3.766511917114258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,fp8,0,3.7564427057902017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,4,128,0,1,float16,float16,0,3.760650634765625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,4,128,0,1,fp8,fp8,0,3.3652801513671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,float16,0,3.830671946207682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,8,128,0,1,fp8,fp8,0,3.4080801010131836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,64,8,128,0,1,float16,fp8,0,3.8293066024780273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,float16,0,2.2068427403767905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,64,128,0,1,fp8,fp8,0,1.9793599446614583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,64,128,0,1,float16,fp8,0,2.240656057993571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,float16,0,1.8554399808247883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,1,128,0,1,fp8,fp8,0,1.6652746200561523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,1,128,0,1,float16,fp8,0,1.8652586936950684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,float16,0,1.8632906277974446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,2,128,0,1,fp8,fp8,0,1.669962724049886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,2,128,0,1,float16,fp8,0,1.875871976216634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,float16,0,1.874773343404134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,4,128,0,1,fp8,fp8,0,1.6803040504455566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,4,128,0,1,float16,fp8,0,1.8890186945597331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,float16,0,1.1284639835357666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,float16,0,1.8981173833211262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,8,128,0,1,fp8,fp8,0,1.701855977376302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,64,8,128,0,1,float16,fp8,0,1.9167146682739258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,64,128,0,1,float16,fp8,0,1.150490681330363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,float16,0,0.9543840090433756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,64,128,0,1,fp8,fp8,0,1.0200479825337727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,1,128,0,1,float16,fp8,0,0.9552213350931803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,1,128,0,1,fp8,fp8,0,0.8590986728668213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,2,128,0,1,fp8,fp8,0,0.8593653043111166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,float16,0,0.960431973139445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,2,128,0,1,float16,fp8,0,0.9572266737620035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,float16,0,0.9665760199228922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,4,128,0,1,fp8,fp8,0,0.8672800064086914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,4,128,0,1,float16,fp8,0,0.9695893128712972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,float16,0,0.9746879736582438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,float16,0,0.5931946833928426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,8,128,0,1,float16,fp8,0,0.9854293664296468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,64,128,0,1,float16,fp8,0,0.6043519973754883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,64,8,128,0,1,fp8,fp8,0,0.8772586981455485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,64,128,0,1,fp8,fp8,0,0.5402826468149821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,float16,0,0.5039733250935873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,1,128,0,1,fp8,fp8,0,0.4569813410441081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,1,128,0,1,float16,fp8,0,0.5035680135091146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,float16,0,0.5053973197937012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,2,128,0,1,fp8,fp8,0,0.45936532815297443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,2,128,0,1,float16,fp8,0,0.507861336072286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,float16,0,0.5069119930267334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,4,128,0,1,fp8,fp8,0,0.4614986578623454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,4,128,0,1,float16,fp8,0,0.5133013327916464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,float16,0,0.5149759848912557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,8,128,0,1,float16,fp8,0,0.51910400390625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,float16,0,0.3226933280626933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,64,128,0,1,float16,fp8,0,0.3307680090268453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,64,8,128,0,1,fp8,fp8,0,0.4655146598815918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,64,128,0,1,fp8,fp8,0,0.2978559931119283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,float16,0,0.2747466762860616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,1,128,0,1,float16,fp8,0,0.27804799874623615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,1,128,0,1,fp8,fp8,0,0.25512532393137616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,float16,0,0.27633599440256756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,2,128,0,1,float16,fp8,0,0.2774933377901713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,2,128,0,1,fp8,fp8,0,0.2569920023282369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,float16,0,0.27909332513809204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,4,128,0,1,float16,fp8,0,0.28015466531117755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,4,128,0,1,fp8,fp8,0,0.25774399439493817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,float16,0,0.2842400074005127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,8,128,0,1,float16,fp8,0,0.2852800091107686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,64,8,128,0,1,fp8,fp8,0,0.25939200321833294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,float16,0,0.1914506753285726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,64,128,0,1,float16,fp8,0,0.1948266625404358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,64,128,0,1,fp8,fp8,0,0.17563732465108237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,float16,0,0.15653333067893982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,1,128,0,1,float16,fp8,0,0.15877866744995117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,1,128,0,1,fp8,fp8,0,0.1498026649157206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,float16,0,0.15892266233762106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,2,128,0,1,float16,fp8,0,0.15689599514007568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,2,128,0,1,fp8,fp8,0,0.15035733580589294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,float16,0,0.15847466389338175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,4,128,0,1,float16,fp8,0,0.1609119971593221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,4,128,0,1,fp8,fp8,0,0.15289599696795145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,float16,0,0.1630826691786448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,8,128,0,1,float16,fp8,0,0.1641546686490377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,float16,0,0.11963733037312825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,64,8,128,0,1,fp8,fp8,0,0.15677866339683533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,64,128,0,1,float16,fp8,0,0.12187199791272481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,float16,0,0.10514133175214131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,64,128,0,1,fp8,fp8,0,0.11362666885058086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,1,128,0,1,float16,fp8,0,0.10557333628336589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,1,128,0,1,fp8,fp8,0,0.10140267014503479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,float16,0,0.10425066947937012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,2,128,0,1,float16,fp8,0,0.10545600454012553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,2,128,0,1,fp8,fp8,0,0.10098133484522502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,float16,0,0.10526933272679646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,4,128,0,1,float16,fp8,0,0.10525332887967427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,4,128,0,1,fp8,fp8,0,0.10108799735705058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,float16,0,0.10515200098355611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,8,128,0,1,float16,fp8,0,0.10572266578674316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,64,8,128,0,1,fp8,fp8,0,0.10130666693051656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,1,128,0,1,fp8,fp8,0,3.7630561192830405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,fp8,0,3.973461469014486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,1,128,0,1,float16,float16,0,4.052639961242676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,float16,0,4.0792694091796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,2,128,0,1,fp8,fp8,0,4.019290606180827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,2,128,0,1,float16,fp8,0,4.146442731221517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,float16,0,4.131834665934245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,4,128,0,1,float16,fp8,0,4.088063875834147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,4,128,0,1,fp8,fp8,0,4.05071481068929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,float16,0,4.255925178527832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,8,128,0,1,float16,fp8,0,4.22217591603597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,64,8,128,0,1,fp8,fp8,0,4.543792088826497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,float16,0,2.465760072072347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,64,128,0,1,float16,fp8,0,2.4307360649108887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,64,128,0,1,fp8,fp8,0,2.380832036336263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,float16,0,1.9872320493062336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,1,128,0,1,float16,fp8,0,1.9807467460632324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,1,128,0,1,fp8,fp8,0,1.8903892834981282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,float16,0,2.007706642150879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,2,128,0,1,float16,fp8,0,2.008608023325602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,2,128,0,1,fp8,fp8,0,1.971029281616211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,float16,0,2.0297013918558755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,4,128,0,1,float16,fp8,0,2.041386604309082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,4,128,0,1,fp8,fp8,0,1.970410664876302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,float16,0,2.102186679840088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,float16,0,1.2210079828898113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,64,128,0,1,float16,fp8,0,1.20687468846639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,8,128,0,1,float16,fp8,0,2.0874080657958984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,64,8,128,0,1,fp8,fp8,0,2.2572426795959473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,float16,0,1.0029066403706868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,64,128,0,1,fp8,fp8,0,1.1737919648488362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,1,128,0,1,float16,fp8,0,1.0054506460825603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,1,128,0,1,fp8,fp8,0,0.9584799607594808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,float16,0,1.0122666358947754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,2,128,0,1,float16,fp8,0,1.020037333170573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,2,128,0,1,fp8,fp8,0,0.9671146869659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,float16,0,1.0252052942911785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,4,128,0,1,float16,fp8,0,1.0233333110809326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,4,128,0,1,fp8,fp8,0,0.9823359648386637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,float16,0,1.0451520284016926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,8,128,0,1,float16,fp8,0,1.0473546981811523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,float16,0,0.6232959826787313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,64,128,0,1,float16,fp8,0,0.6102720101674398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,64,8,128,0,1,fp8,fp8,0,1.1117760340372722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,64,128,0,1,fp8,fp8,0,0.5970773299535116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,float16,0,0.5163946549097697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,1,128,0,1,float16,fp8,0,0.5153546730677286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,1,128,0,1,fp8,fp8,0,0.48852264881134033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,float16,0,0.5185546477635702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,2,128,0,1,fp8,fp8,0,0.49668268362681073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,2,128,0,1,float16,fp8,0,0.5204480091730753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,float16,0,0.5235840082168579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,4,128,0,1,float16,fp8,0,0.527023990948995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,4,128,0,1,fp8,fp8,0,0.49563201268513996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,float16,0,0.536138653755188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,8,128,0,1,float16,fp8,0,0.5338506698608398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,64,8,128,0,1,fp8,fp8,0,0.5336159865061442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,float16,0,0.3272479971249898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,64,128,0,1,float16,fp8,0,0.32073066631952923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,64,128,0,1,fp8,fp8,0,0.30980799595514935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,float16,0,0.27164800961812335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,1,128,0,1,float16,fp8,0,0.2696693340937297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,1,128,0,1,fp8,fp8,0,0.2523519992828369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,float16,0,0.2704213261604309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,2,128,0,1,float16,fp8,0,0.2720320026079814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,2,128,0,1,fp8,fp8,0,0.2578879992167155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,float16,0,0.27298132578531903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,4,128,0,1,float16,fp8,0,0.2730773289998372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,4,128,0,1,fp8,fp8,0,0.26100800434748334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,float16,0,0.2826613386472066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,8,128,0,1,fp8,fp8,0,0.2669866681098938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,64,8,128,0,1,float16,fp8,0,0.28041066726048786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,float16,0,0.17904533942540488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,64,128,0,1,float16,fp8,0,0.17547200123469034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,float16,0,0.1458346645037333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,64,128,0,1,fp8,fp8,0,0.1690773367881775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,1,128,0,1,float16,fp8,0,0.1458133359750112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,1,128,0,1,fp8,fp8,0,0.13699199755986533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,float16,0,0.1469013293584188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,2,128,0,1,float16,fp8,0,0.14636799693107605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,2,128,0,1,fp8,fp8,0,0.13823466499646506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,float16,0,0.14829867084821066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,4,128,0,1,float16,fp8,0,0.14877866705258688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,4,128,0,1,fp8,fp8,0,0.14031466841697693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,float16,0,0.15176533659299216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,8,128,0,1,float16,fp8,0,0.15084266662597656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,float16,0,0.10121599833170573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,64,8,128,0,1,fp8,fp8,0,0.143994669119517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,64,128,0,1,float16,fp8,0,0.0992693305015564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,64,128,0,1,fp8,fp8,0,0.09526933232943217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,float16,0,0.08170133332411449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,1,128,0,1,float16,fp8,0,0.08089600006739299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,1,128,0,1,fp8,fp8,0,0.07579733431339264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,float16,0,0.08072533210118611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,2,128,0,1,float16,fp8,0,0.08212266862392426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,2,128,0,1,fp8,fp8,0,0.07656533519426982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,float16,0,0.0820000022649765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,4,128,0,1,fp8,fp8,0,0.0768746683994929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,4,128,0,1,float16,fp8,0,0.08089600006739299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,float16,0,0.08284799754619598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,8,128,0,1,float16,fp8,0,0.08363200227419536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,64,8,128,0,1,fp8,fp8,0,0.07980800171693166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,float16,0,0.05684266487757365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,64,128,0,1,float16,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,64,128,0,1,fp8,fp8,0,0.05821333328882853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,float16,0,0.052101333936055504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,1,128,0,1,float16,fp8,0,0.05300800005594889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,1,128,0,1,fp8,fp8,0,0.049786667029062905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,float16,0,0.05226133267084757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,2,128,0,1,float16,fp8,0,0.05236266553401947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,2,128,0,1,fp8,fp8,0,0.048623998959859215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,float16,0,0.05193600058555603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,4,128,0,1,float16,fp8,0,0.052330667773882546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,4,128,0,1,fp8,fp8,0,0.04849066833655039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,float16,0,0.052144000927607216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,8,128,0,1,float16,fp8,0,0.05373866856098175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,64,8,128,0,1,fp8,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,1,128,0,1,fp8,fp8,0,3.2895787556966147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,1,128,0,1,float16,fp8,0,3.4150826136271157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,1,128,0,1,float16,float16,0,3.4988746643066406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,2,128,0,1,float16,fp8,0,3.5071519215901694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,2,128,0,1,fp8,fp8,0,3.5781920750935874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,2,128,0,1,float16,float16,0,3.562901178995768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,4,128,0,1,float16,float16,0,3.5822668075561523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,4,128,0,1,fp8,fp8,0,3.6083733240763345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,4,128,0,1,float16,fp8,0,3.6253973642985025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,8,128,0,1,float16,float16,0,3.6727094650268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,64,128,0,1,float16,float16,0,2.171664079030355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,64,128,0,1,float16,fp8,0,2.1268693606058755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,8,128,0,1,float16,fp8,0,3.6642611821492515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,64,8,128,0,1,fp8,fp8,0,4.062394777933757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,1,128,0,1,float16,float16,0,1.7221333185831706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,64,128,0,1,fp8,fp8,0,2.1403306325276694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,1,128,0,1,float16,fp8,0,1.7164853413899739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,1,128,0,1,fp8,fp8,0,1.653999964396159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,2,128,0,1,float16,float16,0,1.7416747411092122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,2,128,0,1,float16,fp8,0,1.7531147003173828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,2,128,0,1,fp8,fp8,0,1.7280160586039226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,4,128,0,1,float16,float16,0,1.7548853556315105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,4,128,0,1,float16,fp8,0,1.7671680450439453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,4,128,0,1,fp8,fp8,0,1.786197344462077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,8,128,0,1,float16,float16,0,1.826464017232259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,8,128,0,1,float16,fp8,0,1.8052533467610676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,64,128,0,1,float16,float16,0,1.0599626700083415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,64,128,0,1,float16,fp8,0,1.0579840342203777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,64,8,128,0,1,fp8,fp8,0,2.0180373191833496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,1,128,0,1,float16,float16,0,0.8739306926727295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,64,128,0,1,fp8,fp8,0,1.0495519638061523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,1,128,0,1,float16,fp8,0,0.8724053700764974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,1,128,0,1,fp8,fp8,0,0.8354667027791342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,2,128,0,1,float16,float16,0,0.8827520211537679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,2,128,0,1,float16,fp8,0,0.8875786463419596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,2,128,0,1,fp8,fp8,0,0.856117328008016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,4,128,0,1,float16,float16,0,0.8886720339457194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,4,128,0,1,fp8,fp8,0,0.8550933202107748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,4,128,0,1,float16,fp8,0,0.894431988398234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,8,128,0,1,float16,float16,0,0.9053226312001547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,8,128,0,1,float16,fp8,0,0.9041066964467367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,64,128,0,1,float16,float16,0,0.5425120194753011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,64,128,0,1,float16,fp8,0,0.531823992729187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,64,128,0,1,fp8,fp8,0,0.5340426762898763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,64,8,128,0,1,fp8,fp8,0,0.9999840259552002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,1,128,0,1,float16,float16,0,0.4465013345082601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,1,128,0,1,float16,fp8,0,0.4460746844609578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,1,128,0,1,fp8,fp8,0,0.4280159870783488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,2,128,0,1,float16,float16,0,0.451749324798584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,2,128,0,1,fp8,fp8,0,0.4349973201751709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,2,128,0,1,float16,fp8,0,0.4516479969024658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,4,128,0,1,float16,float16,0,0.4559946854909261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,4,128,0,1,float16,fp8,0,0.4559893210728963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,4,128,0,1,fp8,fp8,0,0.43792001406351727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,8,128,0,1,float16,float16,0,0.4644586642583211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,8,128,0,1,float16,fp8,0,0.4628693262736003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,64,128,0,1,float16,float16,0,0.2834293246269226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,64,128,0,1,float16,fp8,0,0.27667200565338135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,64,8,128,0,1,fp8,fp8,0,0.4805813233057658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,64,128,0,1,fp8,fp8,0,0.2781333327293396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,1,128,0,1,float16,float16,0,0.2346400022506714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,1,128,0,1,float16,fp8,0,0.2337119976679484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,1,128,0,1,fp8,fp8,0,0.2216213345527649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,2,128,0,1,float16,float16,0,0.2351199984550476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,2,128,0,1,float16,fp8,0,0.23568000396092734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,2,128,0,1,fp8,fp8,0,0.22747200727462769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,4,128,0,1,float16,float16,0,0.23729600509007773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,4,128,0,1,float16,fp8,0,0.2373653252919515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,4,128,0,1,fp8,fp8,0,0.23111466566721597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,8,128,0,1,float16,float16,0,0.24256000916163126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,8,128,0,1,float16,fp8,0,0.2418559988339742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,64,8,128,0,1,fp8,fp8,0,0.2360746661822001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,64,128,0,1,float16,float16,0,0.15594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,64,128,0,1,float16,fp8,0,0.15096533298492432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,64,128,0,1,fp8,fp8,0,0.15133866667747498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,1,128,0,1,float16,float16,0,0.12802666425704956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,1,128,0,1,float16,fp8,0,0.12703999876976013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,1,128,0,1,fp8,fp8,0,0.11979732910792033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,2,128,0,1,float16,float16,0,0.12727466225624084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,2,128,0,1,float16,fp8,0,0.12796266873677573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,2,128,0,1,fp8,fp8,0,0.12156800429026286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,4,128,0,1,float16,float16,0,0.12803733348846436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,4,128,0,1,float16,fp8,0,0.12929067015647888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,4,128,0,1,fp8,fp8,0,0.122597336769104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,8,128,0,1,float16,float16,0,0.13200533390045166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,8,128,0,1,float16,fp8,0,0.13095466295878092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,64,8,128,0,1,fp8,fp8,0,0.12757866581281027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,64,128,0,1,float16,float16,0,0.08687999844551086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,64,128,0,1,float16,fp8,0,0.08542399605115254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,64,128,0,1,fp8,fp8,0,0.08487466971079509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,1,128,0,1,float16,float16,0,0.07010133564472198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,1,128,0,1,float16,fp8,0,0.07110933462778728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,1,128,0,1,fp8,fp8,0,0.0645653357108434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,2,128,0,1,float16,float16,0,0.07019733389218648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,2,128,0,1,float16,fp8,0,0.07045333087444305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,2,128,0,1,fp8,fp8,0,0.06622933348019917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,4,128,0,1,float16,float16,0,0.07042666773001353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,4,128,0,1,float16,fp8,0,0.07069333394368489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,4,128,0,1,fp8,fp8,0,0.06682666639486949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,8,128,0,1,float16,float16,0,0.07276799778143565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,8,128,0,1,fp8,fp8,0,0.06919466455777486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,64,8,128,0,1,float16,fp8,0,0.07074133555094402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,64,128,0,1,float16,float16,0,0.04931733508904775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,64,128,0,1,float16,fp8,0,0.048800001541773476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,64,128,0,1,fp8,fp8,0,0.051088000337282814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,1,128,0,1,float16,float16,0,0.045653333266576133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,1,128,0,1,float16,fp8,0,0.04548266530036926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,1,128,0,1,fp8,fp8,0,0.04350399971008301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,2,128,0,1,float16,float16,0,0.04554666578769684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,2,128,0,1,float16,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,2,128,0,1,fp8,fp8,0,0.04230933388074239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,4,128,0,1,float16,float16,0,0.044437333941459656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,4,128,0,1,float16,fp8,0,0.04560533165931702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,4,128,0,1,fp8,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,8,128,0,1,float16,float16,0,0.04557333389918009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,8,128,0,1,float16,fp8,0,0.04637333254019419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,64,8,128,0,1,fp8,fp8,0,0.043951998154322304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,64,128,0,1,float16,float16,0,0.031328000128269196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,64,128,0,1,float16,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,1,128,0,1,float16,float16,0,0.029765332738558452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,64,128,0,1,fp8,fp8,0,0.03164800008138021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,1,128,0,1,float16,fp8,0,0.029946667452653248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,1,128,0,1,fp8,fp8,0,0.02794133375088374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,2,128,0,1,float16,float16,0,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,2,128,0,1,float16,fp8,0,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,2,128,0,1,fp8,fp8,0,0.0283146674434344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,4,128,0,1,float16,float16,0,0.029450667401154835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,4,128,0,1,float16,fp8,0,0.029322666426499683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,4,128,0,1,fp8,fp8,0,0.029120000700155895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,8,128,0,1,float16,float16,0,0.02940800040960312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,8,128,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,64,8,128,0,1,fp8,fp8,0,0.028853334486484528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,1,128,0,1,float16,float16,0,1.5832373301188152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,1,128,0,1,float16,fp8,0,1.5824960072835286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,1,128,0,1,fp8,fp8,0,1.5262986818949382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,2,128,0,1,float16,fp8,0,1.6047892570495605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,2,128,0,1,float16,float16,0,1.5995786984761555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,2,128,0,1,fp8,fp8,0,1.620682716369629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,4,128,0,1,float16,float16,0,1.615301291147868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,4,128,0,1,fp8,fp8,0,1.5842453638712566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,4,128,0,1,float16,fp8,0,1.6255253156026204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,8,128,0,1,float16,float16,0,1.6759413083394368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,64,128,0,1,float16,float16,0,0.9896586736043295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,64,128,0,1,float16,fp8,0,0.9716053009033203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,8,128,0,1,float16,fp8,0,1.6568800608317058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,64,8,128,0,1,fp8,fp8,0,1.886090596516927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,64,128,0,1,fp8,fp8,0,0.985637346903483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,1,128,0,1,float16,float16,0,0.804154634475708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,1,128,0,1,float16,fp8,0,0.8044319947560629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,1,128,0,1,fp8,fp8,0,0.7694559892018636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,2,128,0,1,float16,float16,0,0.8069279988606771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,2,128,0,1,float16,fp8,0,0.8069866498311361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,2,128,0,1,fp8,fp8,0,0.7832106749216715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,4,128,0,1,float16,float16,0,0.8174986839294434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,4,128,0,1,float16,fp8,0,0.8141173521677653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,4,128,0,1,fp8,fp8,0,0.7947093645731608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,8,128,0,1,float16,float16,0,0.831712007522583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,8,128,0,1,float16,fp8,0,0.8292106787363688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,64,128,0,1,float16,float16,0,0.504800001780192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,64,128,0,1,float16,fp8,0,0.49899200598398846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,64,8,128,0,1,fp8,fp8,0,0.928816000620524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,64,128,0,1,fp8,fp8,0,0.5019893248875936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,1,128,0,1,float16,float16,0,0.4131093422571818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,1,128,0,1,float16,fp8,0,0.41233599185943604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,1,128,0,1,fp8,fp8,0,0.39474666118621826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,2,128,0,1,float16,float16,0,0.4161653518676758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,2,128,0,1,float16,fp8,0,0.413424015045166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,2,128,0,1,fp8,fp8,0,0.4028000036875407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,4,128,0,1,float16,float16,0,0.418015996615092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,4,128,0,1,float16,fp8,0,0.4172266721725464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,4,128,0,1,fp8,fp8,0,0.403439998626709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,8,128,0,1,float16,float16,0,0.42661865552266437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,8,128,0,1,float16,fp8,0,0.42447467645009357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,64,128,0,1,float16,float16,0,0.2656853397687276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,64,8,128,0,1,fp8,fp8,0,0.45077331860860187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,64,128,0,1,float16,fp8,0,0.2606933315594991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,1,128,0,1,float16,float16,0,0.2164213260014852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,64,128,0,1,fp8,fp8,0,0.2614826758702596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,1,128,0,1,float16,fp8,0,0.21660266319910684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,1,128,0,1,fp8,fp8,0,0.20458134015401205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,2,128,0,1,float16,float16,0,0.21745065848032633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,2,128,0,1,float16,fp8,0,0.2185386617978414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,2,128,0,1,fp8,fp8,0,0.2100480000178019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,4,128,0,1,float16,float16,0,0.21839465697606406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,4,128,0,1,float16,fp8,0,0.21844265858332315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,4,128,0,1,fp8,fp8,0,0.21395200490951538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,8,128,0,1,float16,float16,0,0.22521599133809408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,8,128,0,1,float16,fp8,0,0.22402133544286093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,64,128,0,1,float16,float16,0,0.14550933241844177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,64,8,128,0,1,fp8,fp8,0,0.21801066398620605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,64,128,0,1,float16,fp8,0,0.1404800017674764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,64,128,0,1,fp8,fp8,0,0.14300266901652017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,1,128,0,1,float16,fp8,0,0.11547733346621196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,1,128,0,1,float16,float16,0,0.1150986651579539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,1,128,0,1,fp8,fp8,0,0.10991467038790385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,2,128,0,1,float16,float16,0,0.116047998269399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,2,128,0,1,float16,fp8,0,0.11600533127784729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,2,128,0,1,fp8,fp8,0,0.11014933387438457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,4,128,0,1,float16,float16,0,0.1176639993985494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,4,128,0,1,float16,fp8,0,0.1167680025100708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,4,128,0,1,fp8,fp8,0,0.11175466577212016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,8,128,0,1,float16,float16,0,0.12074666221936543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,8,128,0,1,float16,fp8,0,0.11975466211636861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,64,8,128,0,1,fp8,fp8,0,0.11613866686820984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,64,128,0,1,float16,float16,0,0.08273600041866302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,64,128,0,1,float16,fp8,0,0.08123200138409932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,64,128,0,1,fp8,fp8,0,0.07926400005817413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,1,128,0,1,float16,float16,0,0.06655466556549072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,1,128,0,1,float16,fp8,0,0.06621866424878438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,1,128,0,1,fp8,fp8,0,0.06177066763242086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,2,128,0,1,float16,float16,0,0.06635733445485432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,2,128,0,1,float16,fp8,0,0.06643199920654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,2,128,0,1,fp8,fp8,0,0.061706667145093284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,4,128,0,1,float16,float16,0,0.06529599924882253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,4,128,0,1,float16,fp8,0,0.06644266843795776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,4,128,0,1,fp8,fp8,0,0.06178666651248932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,8,128,0,1,float16,float16,0,0.06666666766007741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,8,128,0,1,float16,fp8,0,0.06634666522343953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,64,128,0,1,float16,float16,0,0.04468800127506256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,64,8,128,0,1,fp8,fp8,0,0.06422399977842967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,64,128,0,1,float16,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,64,128,0,1,fp8,fp8,0,0.046906664967536926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,1,128,0,1,float16,float16,0,0.03993066648642222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,1,128,0,1,float16,fp8,0,0.04153066625197729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,2,128,0,1,float16,float16,0,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,1,128,0,1,fp8,fp8,0,0.038848000268141426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,2,128,0,1,float16,fp8,0,0.04196266829967499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,2,128,0,1,fp8,fp8,0,0.0391839991013209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,4,128,0,1,float16,float16,0,0.04156800111134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,4,128,0,1,float16,fp8,0,0.040063999593257904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,4,128,0,1,fp8,fp8,0,0.03921599934498469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,8,128,0,1,float16,float16,0,0.0414986660083135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,8,128,0,1,float16,fp8,0,0.04077333211898804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,64,128,0,1,float16,float16,0,0.02794666588306427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,64,8,128,0,1,fp8,fp8,0,0.03854399919509888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,64,128,0,1,float16,fp8,0,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,64,128,0,1,fp8,fp8,0,0.02828799933195114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,1,128,0,1,float16,float16,0,0.02758399893840154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,1,128,0,1,float16,fp8,0,0.027295999228954315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,1,128,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,2,128,0,1,float16,float16,0,0.02771199991305669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,2,128,0,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,4,128,0,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,2,128,0,1,fp8,fp8,0,0.025813333690166473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,4,128,0,1,float16,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,4,128,0,1,fp8,fp8,0,0.02644266684850057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,8,128,0,1,float16,float16,0,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,8,128,0,1,float16,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,64,8,128,0,1,fp8,fp8,0,0.02773866554101308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,64,128,0,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,64,128,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,1,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,64,128,0,1,fp8,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,1,128,0,1,float16,fp8,0,0.021573332448800404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,1,128,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,2,128,0,1,float16,float16,0,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,2,128,0,1,float16,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,2,128,0,1,fp8,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,4,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,4,128,0,1,float16,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,4,128,0,1,fp8,fp8,0,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,8,128,0,1,float16,float16,0,0.022106667359670002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,8,128,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,64,8,128,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,1,128,0,1,float16,float16,0,0.8712053298950195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,1,128,0,1,float16,fp8,0,0.8659839630126953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,1,128,0,1,fp8,fp8,0,0.8366026878356934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,2,128,0,1,float16,float16,0,0.8836426734924316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,2,128,0,1,fp8,fp8,0,0.854144016901652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,2,128,0,1,float16,fp8,0,0.8833866914113363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,4,128,0,1,float16,float16,0,0.8897120157877604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,4,128,0,1,float16,fp8,0,0.8845600287119547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,4,128,0,1,fp8,fp8,0,0.8704480330149332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,8,128,0,1,float16,float16,0,0.9069493611653646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,8,128,0,1,float16,fp8,0,0.9008586406707764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,64,128,0,1,float16,float16,0,0.5274186531702677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,64,128,0,1,float16,fp8,0,0.5176479816436768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,64,8,128,0,1,fp8,fp8,0,0.9918293158213297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,1,128,0,1,float16,float16,0,0.4435306787490845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,64,128,0,1,fp8,fp8,0,0.532421350479126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,1,128,0,1,float16,fp8,0,0.44228800137837726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,1,128,0,1,fp8,fp8,0,0.4256693522135417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,2,128,0,1,float16,float16,0,0.4455039898554484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,2,128,0,1,fp8,fp8,0,0.4377280076344808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,2,128,0,1,float16,fp8,0,0.4450026750564575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,4,128,0,1,float16,float16,0,0.45252267519632977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,4,128,0,1,float16,fp8,0,0.44899733861287433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,4,128,0,1,fp8,fp8,0,0.4407680034637451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,8,128,0,1,float16,float16,0,0.4617813428243001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,8,128,0,1,float16,fp8,0,0.4607413212458293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,64,128,0,1,float16,float16,0,0.2730986674626668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,64,128,0,1,float16,fp8,0,0.2667413353919983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,64,8,128,0,1,fp8,fp8,0,0.4793813228607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,64,128,0,1,fp8,fp8,0,0.2744053403536479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,1,128,0,1,float16,float16,0,0.2302666703859965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,1,128,0,1,float16,fp8,0,0.22892266511917114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,1,128,0,1,fp8,fp8,0,0.2200053334236145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,2,128,0,1,float16,float16,0,0.23088000218073526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,2,128,0,1,float16,fp8,0,0.23187732696533203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,2,128,0,1,fp8,fp8,0,0.22497600317001343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,4,128,0,1,float16,float16,0,0.23425066471099854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,4,128,0,1,float16,fp8,0,0.2341973384221395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,4,128,0,1,fp8,fp8,0,0.2262399991353353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,8,128,0,1,float16,float16,0,0.2388533353805542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,8,128,0,1,float16,fp8,0,0.23932266235351562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,64,8,128,0,1,fp8,fp8,0,0.23081600666046143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,64,128,0,1,float16,float16,0,0.14808533589045206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,64,128,0,1,float16,fp8,0,0.14270933469136557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,64,128,0,1,fp8,fp8,0,0.14709867040316263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,1,128,0,1,float16,float16,0,0.1242133378982544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,1,128,0,1,float16,fp8,0,0.12337066729863484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,1,128,0,1,fp8,fp8,0,0.11691733201344807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,2,128,0,1,float16,float16,0,0.1244586706161499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,2,128,0,1,float16,fp8,0,0.12415466705958049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,2,128,0,1,fp8,fp8,0,0.11949867010116577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,4,128,0,1,float16,float16,0,0.12611732880274454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,4,128,0,1,float16,fp8,0,0.12619200348854065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,4,128,0,1,fp8,fp8,0,0.12065600355466206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,8,128,0,1,float16,float16,0,0.12734933694203696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,8,128,0,1,float16,fp8,0,0.1285866697629293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,64,128,0,1,float16,float16,0,0.08104533453782399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,64,8,128,0,1,fp8,fp8,0,0.12417599558830261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,64,128,0,1,float16,fp8,0,0.0803306649128596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,1,128,0,1,float16,float16,0,0.06795733173688252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,64,128,0,1,fp8,fp8,0,0.08273066580295563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,1,128,0,1,float16,fp8,0,0.06634133557478587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,1,128,0,1,fp8,fp8,0,0.06260799864927928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,2,128,0,1,float16,float16,0,0.0681386689345042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,2,128,0,1,float16,fp8,0,0.06651733318964641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,2,128,0,1,fp8,fp8,0,0.062352001667022705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,4,128,0,1,float16,fp8,0,0.06840533514817555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,4,128,0,1,float16,float16,0,0.06801066795984904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,4,128,0,1,fp8,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,8,128,0,1,float16,float16,0,0.06964799761772156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,8,128,0,1,float16,fp8,0,0.06844266752401988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,64,8,128,0,1,fp8,fp8,0,0.0664213349421819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,64,128,0,1,float16,float16,0,0.04642133414745331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,64,128,0,1,float16,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,64,128,0,1,fp8,fp8,0,0.04795200129350027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,1,128,0,1,float16,float16,0,0.04159999887148539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,1,128,0,1,float16,fp8,0,0.04289066791534424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,1,128,0,1,fp8,fp8,0,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,2,128,0,1,float16,float16,0,0.042117332418759666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,2,128,0,1,float16,fp8,0,0.04381333291530609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,2,128,0,1,fp8,fp8,0,0.03966933240493139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,4,128,0,1,float16,float16,0,0.04215999941031138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,4,128,0,1,float16,fp8,0,0.04387733340263367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,4,128,0,1,fp8,fp8,0,0.04005333284536997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,8,128,0,1,float16,float16,0,0.04390400151411692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,8,128,0,1,float16,fp8,0,0.04372799893220266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,64,8,128,0,1,fp8,fp8,0,0.043391997615496315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,64,128,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,64,128,0,1,float16,fp8,0,0.030591999491055805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,1,128,0,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,64,128,0,1,fp8,fp8,0,0.02961066613594691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,1,128,0,1,float16,fp8,0,0.027482666075229645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,1,128,0,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,2,128,0,1,float16,float16,0,0.02934933453798294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,2,128,0,1,float16,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,2,128,0,1,fp8,fp8,0,0.02716800073782603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,4,128,0,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,4,128,0,1,float16,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,4,128,0,1,fp8,fp8,0,0.027322667340437572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,8,128,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,8,128,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,64,8,128,0,1,fp8,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,64,128,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,64,128,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,64,128,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,1,128,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,1,128,0,1,float16,fp8,0,0.02093333254257838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,1,128,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,2,128,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,2,128,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,2,128,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,4,128,0,1,float16,float16,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,4,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,4,128,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,8,128,0,1,float16,float16,0,0.020960000654061634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,8,128,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,64,8,128,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,64,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,64,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,64,128,0,1,fp8,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,1,128,0,1,float16,float16,0,0.01777600000301997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,1,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,1,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,2,128,0,1,float16,float16,0,0.017658667018016178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,4,128,0,1,float16,float16,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,2,128,0,1,float16,fp8,0,0.0180479995906353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,4,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,4,128,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,8,128,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,8,128,0,1,float16,fp8,0,0.01833600054184596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,64,8,128,0,1,fp8,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,1,128,0,1,float16,float16,0,0.569599986076355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,1,128,0,1,fp8,fp8,0,0.5448053280512491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,1,128,0,1,float16,fp8,0,0.5656799872716268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,2,128,0,1,float16,float16,0,0.5745973189671835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,2,128,0,1,fp8,fp8,0,0.5532053311665853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,2,128,0,1,float16,fp8,0,0.572218656539917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,4,128,0,1,fp8,fp8,0,0.5531786680221558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,4,128,0,1,float16,fp8,0,0.5730986595153809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,4,128,0,1,float16,float16,0,0.5739680131276449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,8,128,0,1,fp8,fp8,0,0.5756160020828247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,8,128,0,1,float16,float16,0,0.5869226853052775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,64,8,128,0,1,float16,fp8,0,0.5811413526535034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,64,128,0,1,float16,fp8,0,0.32844799757003784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,64,128,0,1,float16,float16,0,0.3344693183898926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,64,128,0,1,fp8,fp8,0,0.3314293424288432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,1,128,0,1,float16,float16,0,0.29228800535202026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,1,128,0,1,float16,fp8,0,0.2912213404973348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,1,128,0,1,fp8,fp8,0,0.27822933594385785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,2,128,0,1,float16,float16,0,0.29470932483673096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,2,128,0,1,float16,fp8,0,0.29393066962560016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,2,128,0,1,fp8,fp8,0,0.2821279962857564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,4,128,0,1,float16,float16,0,0.29360532760620117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,4,128,0,1,float16,fp8,0,0.2956533432006836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,4,128,0,1,fp8,fp8,0,0.279968003431956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,8,128,0,1,float16,fp8,0,0.2990826765696208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,8,128,0,1,float16,float16,0,0.3001706600189209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,64,8,128,0,1,fp8,fp8,0,0.28619199991226196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,64,128,0,1,float16,float16,0,0.1773759921391805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,64,128,0,1,float16,fp8,0,0.17312532663345337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,64,128,0,1,fp8,fp8,0,0.17428267002105713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,1,128,0,1,float16,float16,0,0.1537546714146932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,1,128,0,1,float16,fp8,0,0.15479466319084167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,1,128,0,1,fp8,fp8,0,0.1469386617342631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,2,128,0,1,float16,float16,0,0.1553920010725657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,2,128,0,1,float16,fp8,0,0.15337066849072775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,2,128,0,1,fp8,fp8,0,0.14731199542681375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,4,128,0,1,float16,float16,0,0.15622400244077048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,4,128,0,1,fp8,fp8,0,0.1483573317527771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,8,128,0,1,float16,float16,0,0.15833066900571188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,4,128,0,1,float16,fp8,0,0.1546026666959127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,8,128,0,1,float16,fp8,0,0.1573919951915741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,64,128,0,1,float16,float16,0,0.09537067015965779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,64,8,128,0,1,fp8,fp8,0,0.15266666809717813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,64,128,0,1,float16,fp8,0,0.09488532940546672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,64,128,0,1,fp8,fp8,0,0.09598400195439656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,1,128,0,1,float16,float16,0,0.08335466186205547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,1,128,0,1,float16,fp8,0,0.08276799817879994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,1,128,0,1,fp8,fp8,0,0.07772799829641978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,2,128,0,1,float16,float16,0,0.08496000369389851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,2,128,0,1,float16,fp8,0,0.08477866649627686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,2,128,0,1,fp8,fp8,0,0.07715199887752533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,4,128,0,1,float16,fp8,0,0.08504000306129456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,4,128,0,1,float16,float16,0,0.08518933256467183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,4,128,0,1,fp8,fp8,0,0.07860800127188365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,8,128,0,1,float16,float16,0,0.08593066533406575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,8,128,0,1,float16,fp8,0,0.08614400029182434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,64,8,128,0,1,fp8,fp8,0,0.08078933258851369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,64,128,0,1,float16,float16,0,0.05433600147565206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,64,128,0,1,float16,fp8,0,0.05215999980767568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,64,128,0,1,fp8,fp8,0,0.05551466842492422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,1,128,0,1,float16,fp8,0,0.050026665131251015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,1,128,0,1,float16,float16,0,0.04960533479849497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,1,128,0,1,fp8,fp8,0,0.04631466666857401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,2,128,0,1,float16,float16,0,0.049695998430252075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,2,128,0,1,float16,fp8,0,0.04980800052483877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,4,128,0,1,float16,float16,0,0.04980266590913137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,2,128,0,1,fp8,fp8,0,0.04604266583919525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,4,128,0,1,float16,fp8,0,0.04985066751639048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,4,128,0,1,fp8,fp8,0,0.045893331368764244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,8,128,0,1,float16,float16,0,0.05013866722583771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,8,128,0,1,float16,fp8,0,0.0499893327554067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,64,8,128,0,1,fp8,fp8,0,0.04587733248869578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,64,128,0,1,float16,float16,0,0.03393599887688955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,64,128,0,1,float16,fp8,0,0.03533333291610082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,64,128,0,1,fp8,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,1,128,0,1,float16,float16,0,0.03306133300065994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,1,128,0,1,float16,fp8,0,0.03249600032965342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,1,128,0,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,2,128,0,1,float16,float16,0,0.03328000009059906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,2,128,0,1,float16,fp8,0,0.03339199970165888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,2,128,0,1,fp8,fp8,0,0.03181333343187968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,4,128,0,1,float16,float16,0,0.032618666688601174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,4,128,0,1,float16,fp8,0,0.033610666791598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,4,128,0,1,fp8,fp8,0,0.0316746657093366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,8,128,0,1,float16,float16,0,0.03342933456103007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,8,128,0,1,float16,fp8,0,0.03336533407370249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,64,8,128,0,1,fp8,fp8,0,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,64,128,0,1,float16,float16,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,64,128,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,64,128,0,1,fp8,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,1,128,0,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,1,128,0,1,float16,float16,0,0.02184533327817917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,2,128,0,1,float16,float16,0,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,1,128,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,2,128,0,1,float16,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,2,128,0,1,fp8,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,4,128,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,4,128,0,1,float16,fp8,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,4,128,0,1,fp8,fp8,0,0.02164799968401591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,8,128,0,1,float16,float16,0,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,8,128,0,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,64,8,128,0,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,64,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,64,128,0,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,64,128,0,1,fp8,fp8,0,0.01870399961868922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,1,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,1,128,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,2,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,2,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,4,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,2,128,0,1,fp8,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,4,128,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,4,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,8,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,8,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,64,8,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,64,128,0,1,float16,float16,0,0.016282666474580765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,64,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,64,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,1,128,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,1,128,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,1,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,2,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,2,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,2,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,4,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,4,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,4,128,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,8,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,8,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,64,8,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,1,128,0,1,float16,float16,0,0.4193333387374878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,1,128,0,1,float16,fp8,0,0.41847999890645343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,1,128,0,1,fp8,fp8,0,0.398357351620992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,2,128,0,1,float16,float16,0,0.4219893217086792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,2,128,0,1,float16,fp8,0,0.41997333367665607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,2,128,0,1,fp8,fp8,0,0.40190398693084717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,4,128,0,1,float16,float16,0,0.4195733467737834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,4,128,0,1,float16,fp8,0,0.4203146696090698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,4,128,0,1,fp8,fp8,0,0.40371731917063397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,8,128,0,1,fp8,fp8,0,0.4092746575673421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,8,128,0,1,float16,fp8,0,0.42396799723307294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,64,8,128,0,1,float16,float16,0,0.4235786596934001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,64,128,0,1,float16,float16,0,0.2387359937032064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,64,128,0,1,float16,fp8,0,0.23534933725992838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,64,128,0,1,fp8,fp8,0,0.23562665780385336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,1,128,0,1,float16,float16,0,0.2184000015258789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,1,128,0,1,float16,fp8,0,0.21751999855041504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,1,128,0,1,fp8,fp8,0,0.20641599098841348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,2,128,0,1,float16,fp8,0,0.21769599119822183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,2,128,0,1,float16,float16,0,0.21798400084177652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,2,128,0,1,fp8,fp8,0,0.2079413334528605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,4,128,0,1,float16,float16,0,0.21753599246342978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,4,128,0,1,float16,fp8,0,0.21773332357406616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,4,128,0,1,fp8,fp8,0,0.20845866203308105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,8,128,0,1,float16,float16,0,0.2192479968070984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,8,128,0,1,float16,fp8,0,0.22053333123524985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,64,8,128,0,1,fp8,fp8,0,0.2118133306503296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,64,128,0,1,float16,fp8,0,0.12487467130025227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,64,128,0,1,fp8,fp8,0,0.12691199779510498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,1,128,0,1,float16,float16,0,0.11556800206502278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,64,128,0,1,float16,float16,0,0.12622933586438498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,1,128,0,1,float16,fp8,0,0.11556800206502278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,1,128,0,1,fp8,fp8,0,0.10725866754849751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,2,128,0,1,float16,float16,0,0.11530133088429768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,2,128,0,1,float16,fp8,0,0.11566399534543355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,2,128,0,1,fp8,fp8,0,0.10760000348091125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,4,128,0,1,float16,float16,0,0.11543466647466023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,4,128,0,1,float16,fp8,0,0.11538666486740112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,4,128,0,1,fp8,fp8,0,0.10897066195805867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,8,128,0,1,float16,float16,0,0.11777599652608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,8,128,0,1,float16,fp8,0,0.1167039970556895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,64,8,128,0,1,fp8,fp8,0,0.11125333110491435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,64,128,0,1,float16,float16,0,0.07039999961853027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,64,128,0,1,float16,fp8,0,0.07039999961853027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,64,128,0,1,fp8,fp8,0,0.06966400146484375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,1,128,0,1,float16,float16,0,0.06674133241176605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,1,128,0,1,float16,fp8,0,0.06622399886449178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,1,128,0,1,fp8,fp8,0,0.06227200229962667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,2,128,0,1,float16,float16,0,0.06615466872851054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,2,128,0,1,float16,fp8,0,0.06653333206971486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,2,128,0,1,fp8,fp8,0,0.062021334966023765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,4,128,0,1,float16,float16,0,0.06634666522343953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,4,128,0,1,float16,fp8,0,0.06645866731802623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,4,128,0,1,fp8,fp8,0,0.06233066817124685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,8,128,0,1,float16,float16,0,0.06632000207901001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,8,128,0,1,float16,fp8,0,0.06637333333492279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,64,8,128,0,1,fp8,fp8,0,0.062362665931383766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,64,128,0,1,float16,float16,0,0.04192000130812327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,64,128,0,1,float16,fp8,0,0.04152533411979675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,64,128,0,1,fp8,fp8,0,0.0396373321612676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,1,128,0,1,float16,float16,0,0.039461334546407066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,1,128,0,1,float16,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,1,128,0,1,fp8,fp8,0,0.038047999143600464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,2,128,0,1,float16,fp8,0,0.03976533313592275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,2,128,0,1,float16,float16,0,0.039664000272750854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,2,128,0,1,fp8,fp8,0,0.03751466671625773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,4,128,0,1,float16,float16,0,0.039733332892258964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,4,128,0,1,float16,fp8,0,0.03958933303753535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,4,128,0,1,fp8,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,8,128,0,1,float16,float16,0,0.03958933303753535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,8,128,0,1,float16,fp8,0,0.04018666595220566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,64,8,128,0,1,fp8,fp8,0,0.039701332648595176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,64,128,0,1,float16,float16,0,0.027093333502610523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,64,128,0,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,64,128,0,1,fp8,fp8,0,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,1,128,0,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,1,128,0,1,float16,fp8,0,0.027664000789324444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,1,128,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,2,128,0,1,float16,float16,0,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,2,128,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,2,128,0,1,fp8,fp8,0,0.025813333690166473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,4,128,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,4,128,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,4,128,0,1,fp8,fp8,0,0.026186667382717133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,8,128,0,1,float16,float16,0,0.0271519993742307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,8,128,0,1,float16,fp8,0,0.027237333357334137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,64,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,64,128,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,64,8,128,0,1,fp8,fp8,0,0.027215999861558277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,64,128,0,1,fp8,fp8,0,0.018965333700180054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,1,128,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,1,128,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,1,128,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,2,128,0,1,float16,float16,0,0.019930666933457058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,2,128,0,1,float16,fp8,0,0.02120000123977661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,2,128,0,1,fp8,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,4,128,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,4,128,0,1,float16,fp8,0,0.02092266579469045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,4,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,8,128,0,1,float16,float16,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,8,128,0,1,float16,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,64,8,128,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,64,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,64,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,64,128,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,1,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,1,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,1,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,2,128,0,1,float16,float16,0,0.01575999955336253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,2,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,2,128,0,1,fp8,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,4,128,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,4,128,0,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,4,128,0,1,fp8,fp8,0,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,8,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,8,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,64,8,128,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,64,128,0,1,float16,float16,0,0.01569066693385442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,64,128,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,1,128,0,1,float16,float16,0,0.01591466615597407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,64,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,1,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,2,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,1,128,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,2,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,2,128,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,4,128,0,1,float16,float16,0,0.01674666628241539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,4,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,4,128,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,8,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,8,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,64,8,128,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,1,128,0,1,float16,fp8,0,0.34537601470947266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,1,128,0,1,float16,float16,0,0.3455893198649089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,1,128,0,1,fp8,fp8,0,0.3301653265953064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,2,128,0,1,float16,fp8,0,0.34517331918080646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,2,128,0,1,float16,float16,0,0.3470986684163411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,2,128,0,1,fp8,fp8,0,0.33084267377853394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,4,128,0,1,float16,float16,0,0.34627199172973633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,4,128,0,1,float16,fp8,0,0.3466399908065796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,4,128,0,1,fp8,fp8,0,0.3321066697438558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,8,128,0,1,float16,float16,0,0.34754665692647296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,8,128,0,1,fp8,fp8,0,0.3354026476542155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,64,8,128,0,1,float16,fp8,0,0.3472906748453776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,64,128,0,1,float16,float16,0,0.19106666247049967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,64,128,0,1,float16,fp8,0,0.18926399946212769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,64,128,0,1,fp8,fp8,0,0.18802666664123535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,1,128,0,1,float16,float16,0,0.18158932526906332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,1,128,0,1,fp8,fp8,0,0.1685439944267273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,1,128,0,1,float16,fp8,0,0.17965867122014365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,2,128,0,1,float16,float16,0,0.1800266702969869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,2,128,0,1,float16,fp8,0,0.18149334192276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,2,128,0,1,fp8,fp8,0,0.16818133989969888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,4,128,0,1,float16,float16,0,0.18105065822601318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,4,128,0,1,float16,fp8,0,0.17968533436457315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,4,128,0,1,fp8,fp8,0,0.16890132427215576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,8,128,0,1,float16,float16,0,0.18111467361450195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,8,128,0,1,float16,fp8,0,0.1811413367589315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,64,8,128,0,1,fp8,fp8,0,0.17483200629552206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,64,128,0,1,float16,float16,0,0.10120532910029094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,64,128,0,1,float16,fp8,0,0.10145599643389384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,64,128,0,1,fp8,fp8,0,0.10250133275985718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,1,128,0,1,float16,float16,0,0.09745066364606221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,1,128,0,1,float16,fp8,0,0.09849599997202556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,1,128,0,1,fp8,fp8,0,0.09296000003814697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,2,128,0,1,float16,float16,0,0.09716799855232239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,2,128,0,1,float16,fp8,0,0.09725333253542583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,2,128,0,1,fp8,fp8,0,0.09307199716567993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,4,128,0,1,float16,float16,0,0.09717866778373718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,4,128,0,1,float16,fp8,0,0.09738666812578838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,4,128,0,1,fp8,fp8,0,0.09319999814033508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,8,128,0,1,float16,fp8,0,0.09731200337409973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,8,128,0,1,float16,float16,0,0.0983786682287852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,64,8,128,0,1,fp8,fp8,0,0.09327466289202373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,64,128,0,1,float16,float16,0,0.05699733396371206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,64,128,0,1,float16,fp8,0,0.05852800110975901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,64,128,0,1,fp8,fp8,0,0.05584533512592316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,1,128,0,1,float16,float16,0,0.056234667698542275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,1,128,0,1,float16,fp8,0,0.056362668673197426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,1,128,0,1,fp8,fp8,0,0.05380799869696299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,2,128,0,1,float16,fp8,0,0.056405335664749146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,2,128,0,1,float16,float16,0,0.05638933181762695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,2,128,0,1,fp8,fp8,0,0.053818667928377785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,4,128,0,1,float16,float16,0,0.05641599992911021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,4,128,0,1,float16,fp8,0,0.056218668818473816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,4,128,0,1,fp8,fp8,0,0.053743998209635414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,8,128,0,1,float16,float16,0,0.05613866448402405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,8,128,0,1,float16,fp8,0,0.05633600056171417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,64,8,128,0,1,fp8,fp8,0,0.05416533350944519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,64,128,0,1,float16,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,64,128,0,1,float16,float16,0,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,64,128,0,1,fp8,fp8,0,0.035375999907652535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,1,128,0,1,float16,float16,0,0.03359466542800268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,1,128,0,1,float16,fp8,0,0.035429333647092186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,1,128,0,1,fp8,fp8,0,0.03401066611210505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,2,128,0,1,float16,float16,0,0.035573333501815796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,2,128,0,1,float16,fp8,0,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,2,128,0,1,fp8,fp8,0,0.03397866586844126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,4,128,0,1,float16,float16,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,4,128,0,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,4,128,0,1,fp8,fp8,0,0.03356799980004629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,8,128,0,1,float16,float16,0,0.033759998778502144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,8,128,0,1,fp8,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,64,8,128,0,1,float16,fp8,0,0.035360001027584076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,64,128,0,1,float16,float16,0,0.025381334125995636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,64,128,0,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,64,128,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,1,128,0,1,float16,float16,0,0.024122667809327442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,1,128,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,1,128,0,1,fp8,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,2,128,0,1,float16,fp8,0,0.025077333052953083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,2,128,0,1,float16,float16,0,0.024720000723997753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,2,128,0,1,fp8,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,4,128,0,1,float16,float16,0,0.024112001061439514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,4,128,0,1,float16,fp8,0,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,4,128,0,1,fp8,fp8,0,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,8,128,0,1,float16,float16,0,0.024192000428835552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,8,128,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,64,8,128,0,1,fp8,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,64,128,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,64,128,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,64,128,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,1,128,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,1,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,1,128,0,1,fp8,fp8,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,2,128,0,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,2,128,0,1,float16,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,2,128,0,1,fp8,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,4,128,0,1,float16,float16,0,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,4,128,0,1,float16,fp8,0,0.01940800001223882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,4,128,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,8,128,0,1,float16,float16,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,8,128,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,64,8,128,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,64,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,64,128,0,1,float16,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,64,128,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,1,128,0,1,float16,float16,0,0.015861333658297855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,1,128,0,1,float16,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,2,128,0,1,float16,float16,0,0.015893333901961643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,2,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,4,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,4,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,4,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,8,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,8,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,64,8,128,0,1,fp8,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,64,128,0,1,float16,float16,0,0.016224000602960587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,64,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,64,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,1,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,1,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,2,128,0,1,float16,float16,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,1,128,0,1,fp8,fp8,0,0.01670933390657107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,2,128,0,1,float16,fp8,0,0.016890666137139004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,2,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,4,128,0,1,float16,float16,0,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,4,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,4,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,8,128,0,1,float16,float16,0,0.01684800038735072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,8,128,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,64,8,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,0,0.2900480031967163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,1,128,0,1,fp8,fp8,0,0.2651306589444478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,0,0.2898026704788208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,0,0.28965866565704346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,0,0.2897546688715617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,2,128,0,1,fp8,fp8,0,0.2651466727256775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,0,0.2901493310928345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,4,128,0,1,fp8,fp8,0,0.26528533299763996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,0,0.289792001247406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,0,0.2897973259290059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,0,0.1520799994468689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,8,128,0,1,fp8,fp8,0,0.26531734069188434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,0,0.2898240089416504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,0,0.15265066425005594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,64,128,0,1,fp8,fp8,0,0.14018666744232178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,0,0.15051199992497763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,0,0.15244799852371216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,1,128,0,1,fp8,fp8,0,0.13833066821098328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,0,0.15251200397809347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,0,0.1521013379096985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,2,128,0,1,fp8,fp8,0,0.13834666212399802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,0,0.15254933635393778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,0,0.15064000089963278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,4,128,0,1,fp8,fp8,0,0.1399679978688558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,0,0.15228266517321268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,0,0.1504586637020111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,0,0.08298666775226593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,64,8,128,0,1,fp8,fp8,0,0.1402400036652883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,0,0.08264000217119853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,64,128,0,1,fp8,fp8,0,0.0768746683994929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,0,0.08266133566697438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,0,0.08276266853014629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,1,128,0,1,fp8,fp8,0,0.07655466596285503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,0,0.08292266726493835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,0,0.08287466565767924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,2,128,0,1,fp8,fp8,0,0.07700799902280171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,0,0.08298133313655853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,0,0.0844693382581075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,4,128,0,1,fp8,fp8,0,0.07705066601435344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,0,0.08237866560618083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,0,0.08267199993133545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,64,8,128,0,1,fp8,fp8,0,0.07665599882602692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,0,0.049770668148994446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,64,128,0,1,fp8,fp8,0,0.04589866598447164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,0,0.0497920016447703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,0,0.04764266808827718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,1,128,0,1,fp8,fp8,0,0.046053335070610046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,0,0.049914668003718056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,0,0.047744000951449074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,2,128,0,1,fp8,fp8,0,0.0458133320013682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,0,0.04996799925963084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,0,0.04869333406289419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,4,128,0,1,fp8,fp8,0,0.0462719996770223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,0,0.0481279989083608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,0,0.049626668294270836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,64,8,128,0,1,fp8,fp8,0,0.046165332198143005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,0,0.03138133386770884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,64,128,0,1,fp8,fp8,0,0.03048533449570338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,0,0.031386665999889374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,1,128,0,1,fp8,fp8,0,0.030613332986831665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,2,128,0,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,0,0.03225066761175791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,0,0.031285333136717476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,4,128,0,1,fp8,fp8,0,0.029919999341169994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,0,0.03133333226044973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,0,0.031541332602500916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,0,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,64,8,128,0,1,fp8,fp8,0,0.030250666042168934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,64,128,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,0,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,1,128,0,1,fp8,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,0,0.02497600018978119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,2,128,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,0,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,4,128,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,0,0.02606400102376938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,0,0.023258666197458904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,64,8,128,0,1,fp8,fp8,0,0.023024000227451324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,64,128,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,1,128,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,2,128,0,1,fp8,fp8,0,0.018522666146357853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,4,128,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,64,8,128,0,1,fp8,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,0,0.015765332927306492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,64,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,1,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,2,128,0,1,fp8,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,4,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,8,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,0,0.016255999604860943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,64,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,1,128,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,2,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,0,0.015893333901961643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,4,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,64,8,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,2,128,0,1,fp8,fp8,0,13.310474395751953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,float16,0,17.24293390909831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,2,128,0,1,float16,fp8,0,17.22258122762044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,float16,0,17.23213831583659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,4,128,0,1,fp8,fp8,0,13.38494873046875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,4,128,0,1,float16,fp8,0,18.4738032023112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,float16,0,17.849477132161457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,8,128,0,1,float16,fp8,0,17.707499186197918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,float16,0,9.257029215494791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,48,8,128,0,1,fp8,fp8,0,13.409978230794271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,48,128,0,1,float16,fp8,0,9.430229187011719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,48,128,0,1,fp8,fp8,0,7.0715681711832685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,float16,0,9.10316276550293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,2,128,0,1,fp8,fp8,0,6.773034413655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,2,128,0,1,float16,fp8,0,8.888762791951498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,float16,0,8.742752075195312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,4,128,0,1,float16,fp8,0,9.141248067220053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,4,128,0,1,fp8,fp8,0,6.7504425048828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,float16,0,8.830144246419271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,float16,0,4.532554626464844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,8,128,0,1,float16,fp8,0,8.717909495035807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,48,8,128,0,1,fp8,fp8,0,6.867776234944661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,48,128,0,1,float16,fp8,0,4.549258550008138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,48,128,0,1,fp8,fp8,0,3.6643199920654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,float16,0,4.483733177185059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,2,128,0,1,float16,fp8,0,4.522426605224609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,2,128,0,1,fp8,fp8,0,3.507493336995443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,float16,0,4.451269467671712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,4,128,0,1,fp8,fp8,0,3.5098133087158203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,4,128,0,1,float16,fp8,0,4.290090560913086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,float16,0,4.311962763468425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,8,128,0,1,float16,fp8,0,4.527519861857097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,float16,0,2.333653291066488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,48,128,0,1,float16,fp8,0,2.4097439448038735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,48,8,128,0,1,fp8,fp8,0,3.5218985875447593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,48,128,0,1,fp8,fp8,0,2.3023786544799805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,float16,0,2.2839733759562173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,2,128,0,1,fp8,fp8,0,1.919365406036377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,2,128,0,1,float16,fp8,0,2.3102134068806968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,float16,0,2.2912853558858237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,4,128,0,1,fp8,fp8,0,1.9325920740763347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,4,128,0,1,float16,fp8,0,2.3008906046549478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,float16,0,2.2959893544514975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,8,128,0,1,float16,fp8,0,2.347263971964518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,48,8,128,0,1,fp8,fp8,0,1.9277440706888835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,2,128,0,1,fp8,fp8,0,7.956991831461589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,float16,0,10.147621154785156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,2,128,0,1,float16,fp8,0,9.974533081054688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,float16,0,9.97712516784668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,4,128,0,1,fp8,fp8,0,7.9758561452229815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,4,128,0,1,float16,fp8,0,10.66592025756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,float16,0,10.624858856201172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,8,128,0,1,float16,fp8,0,10.655194600423178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,float16,0,5.483472188313802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,48,8,128,0,1,fp8,fp8,0,8.045663833618164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,48,128,0,1,float16,fp8,0,5.421621322631836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,48,128,0,1,fp8,fp8,0,4.346122741699219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,2,128,0,1,fp8,fp8,0,4.056021372477214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,float16,0,5.272037188212077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,2,128,0,1,float16,fp8,0,5.098106702168782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,float16,0,5.181605339050293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,4,128,0,1,fp8,fp8,0,4.067242622375488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,4,128,0,1,float16,fp8,0,5.244981447855632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,float16,0,5.422880172729492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,8,128,0,1,float16,fp8,0,5.29311466217041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,float16,0,2.757120132446289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,48,128,0,1,float16,fp8,0,2.718581199645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,48,128,0,1,fp8,fp8,0,2.2686452865600586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,48,8,128,0,1,fp8,fp8,0,4.0980532964070635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,float16,0,2.5954507191975913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,2,128,0,1,fp8,fp8,0,2.153541405995687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,2,128,0,1,float16,fp8,0,2.604586601257324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,float16,0,2.598618666330973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,4,128,0,1,float16,fp8,0,2.6167306900024414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,4,128,0,1,fp8,fp8,0,2.1526400248209634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,8,128,0,1,fp8,fp8,0,2.1691733996073403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,float16,0,2.615450700124105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,48,8,128,0,1,float16,fp8,0,2.6208747227986655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,fp8,0,1.4856106440226238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,48,128,0,1,fp8,fp8,0,1.255717356999715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,48,128,0,1,float16,float16,0,1.4765706062316895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,float16,0,1.4105440775553386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,2,128,0,1,fp8,fp8,0,1.2085546652475994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,2,128,0,1,float16,fp8,0,1.3906666437784831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,float16,0,1.417477289835612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,4,128,0,1,float16,fp8,0,1.416810671488444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,4,128,0,1,fp8,fp8,0,1.2126293182373047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,float16,0,1.41538667678833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,8,128,0,1,float16,fp8,0,1.4176319440205891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,48,8,128,0,1,fp8,fp8,0,1.2173120180765789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,2,128,0,1,fp8,fp8,0,5.7789866129557295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,float16,0,7.314581553141276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,2,128,0,1,float16,fp8,0,7.230810801188151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,float16,0,7.294069290161133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,4,128,0,1,fp8,fp8,0,5.787845611572266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,4,128,0,1,float16,fp8,0,7.472757339477539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,float16,0,7.370725631713867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,8,128,0,1,float16,fp8,0,7.278656005859375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,float16,0,4.02837340037028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,48,128,0,1,fp8,fp8,0,3.1950133641560874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,48,128,0,1,float16,fp8,0,3.9388532638549805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,48,8,128,0,1,fp8,fp8,0,5.828224182128906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,float16,0,3.6589600245157876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,2,128,0,1,fp8,fp8,0,2.9737278620402017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,2,128,0,1,float16,fp8,0,3.640495936075846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,float16,0,3.6456212997436523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,4,128,0,1,fp8,fp8,0,2.990101178487142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,4,128,0,1,float16,fp8,0,3.7137279510498047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,float16,0,3.832453409830729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,8,128,0,1,float16,fp8,0,3.685669263203939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,float16,0,2.0547520319620767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,48,8,128,0,1,fp8,fp8,0,2.994511922200521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,48,128,0,1,float16,fp8,0,2.056821346282959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,48,128,0,1,fp8,fp8,0,1.6958133379618328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,float16,0,1.904688040415446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,2,128,0,1,fp8,fp8,0,1.5975626309712727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,2,128,0,1,float16,fp8,0,1.8707040150960286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,float16,0,1.893781344095866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,4,128,0,1,float16,fp8,0,1.9270505905151367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,4,128,0,1,fp8,fp8,0,1.6031146049499512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,float16,0,1.93340269724528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,8,128,0,1,float16,fp8,0,1.9163146018981934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,float16,0,1.118186632792155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,48,8,128,0,1,fp8,fp8,0,1.607301394144694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,48,128,0,1,float16,fp8,0,1.1619786421457927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,48,128,0,1,fp8,fp8,0,1.0058826605478923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,float16,0,1.0450293223063152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,2,128,0,1,fp8,fp8,0,0.9056800206502279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,2,128,0,1,float16,fp8,0,1.0583466688791912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,float16,0,1.0518186887105305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,4,128,0,1,float16,fp8,0,1.0497173468271892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,4,128,0,1,fp8,fp8,0,0.9071626663208008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,float16,0,1.0599146684010823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,8,128,0,1,float16,fp8,0,1.0567946434020996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,48,8,128,0,1,fp8,fp8,0,0.9125493367513021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,2,128,0,1,fp8,fp8,0,7.860789616902669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,float16,0,9.780570983886719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,2,128,0,1,float16,fp8,0,9.910207748413086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,float16,0,9.880879720052084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,4,128,0,1,fp8,fp8,0,7.864442825317383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,4,128,0,1,float16,fp8,0,9.661877314249674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,float16,0,10.083103815714518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,8,128,0,1,float16,fp8,0,10.24178695678711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,float16,0,5.389514923095703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,48,128,0,1,fp8,fp8,0,4.310986518859863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,48,8,128,0,1,fp8,fp8,0,7.994319915771484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,48,128,0,1,float16,fp8,0,5.4277496337890625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,float16,0,4.830666542053223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,2,128,0,1,fp8,fp8,0,3.957765261332194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,2,128,0,1,float16,fp8,0,4.897760073343913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,float16,0,4.953781445821126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,4,128,0,1,float16,fp8,0,4.8743947347005205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,4,128,0,1,fp8,fp8,0,3.976282755533854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,float16,0,4.916421254475911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,8,128,0,1,float16,fp8,0,5.097615877787272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,float16,0,2.7094294230143228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,48,8,128,0,1,fp8,fp8,0,4.017354647318522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,48,128,0,1,fp8,fp8,0,2.230677286783854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,48,128,0,1,float16,fp8,0,2.748021443684896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,float16,0,2.4161599477132163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,2,128,0,1,float16,fp8,0,2.468170642852783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,2,128,0,1,fp8,fp8,0,2.0745226542154946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,float16,0,2.497525374094645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,4,128,0,1,float16,fp8,0,2.4816959698994956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,4,128,0,1,fp8,fp8,0,2.069829305013021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,float16,0,2.522325356801351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,8,128,0,1,float16,fp8,0,2.5168533325195312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,48,8,128,0,1,fp8,fp8,0,2.083834648132324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,float16,0,1.4204319318135579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,48,128,0,1,float16,fp8,0,1.4416640599568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,48,128,0,1,fp8,fp8,0,1.1909973621368408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,float16,0,1.2979040145874023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,2,128,0,1,float16,fp8,0,1.3073546886444092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,2,128,0,1,fp8,fp8,0,1.1206133365631104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,float16,0,1.3020213445027669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,4,128,0,1,fp8,fp8,0,1.1257440249125164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,4,128,0,1,float16,fp8,0,1.31112535794576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,float16,0,1.3265600204467773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,8,128,0,1,fp8,fp8,0,1.1741653283437092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,48,8,128,0,1,float16,fp8,0,1.3177119890848796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,float16,0,0.793392022450765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,48,128,0,1,fp8,fp8,0,0.687338670094808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,48,128,0,1,float16,fp8,0,0.8060692946116129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,float16,0,0.7350080013275146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,2,128,0,1,float16,fp8,0,0.7366986274719238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,2,128,0,1,fp8,fp8,0,0.64955735206604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,float16,0,0.7388479709625244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,4,128,0,1,fp8,fp8,0,0.6504480044047037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,4,128,0,1,float16,fp8,0,0.7417066891988119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,float16,0,0.7416319847106934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,8,128,0,1,fp8,fp8,0,0.6542933384577433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,48,8,128,0,1,float16,fp8,0,0.7523732980092367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,2,128,0,1,fp8,fp8,0,4.887749354044597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,float16,0,5.938282648722331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,2,128,0,1,float16,fp8,0,5.808069229125977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,float16,0,5.8841597239176435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,4,128,0,1,fp8,fp8,0,4.925061225891113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,4,128,0,1,float16,fp8,0,5.982751846313477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,float16,0,5.945242563883464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,8,128,0,1,float16,fp8,0,5.951541264851888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,float16,0,3.317530632019043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,48,128,0,1,float16,fp8,0,3.3345654805501304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,48,128,0,1,fp8,fp8,0,2.745509465535482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,48,8,128,0,1,fp8,fp8,0,4.946463902791341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,float16,0,2.9488960901896157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,2,128,0,1,fp8,fp8,0,2.487226645151774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,2,128,0,1,float16,fp8,0,2.9395039876302085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,float16,0,3.000997225443522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,4,128,0,1,fp8,fp8,0,2.4986186027526855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,4,128,0,1,float16,fp8,0,3.011925379435221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,float16,0,3.084671974182129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,8,128,0,1,float16,fp8,0,3.0393492380777993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,float16,0,1.7318132718404133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,48,8,128,0,1,fp8,fp8,0,2.522325356801351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,48,128,0,1,fp8,fp8,0,1.428559939066569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,48,128,0,1,float16,fp8,0,1.8085120519002278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,float16,0,1.5500106811523438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,2,128,0,1,fp8,fp8,0,1.3074933687845867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,2,128,0,1,float16,fp8,0,1.5296533902486165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,float16,0,1.53165864944458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,4,128,0,1,float16,fp8,0,1.5635253588358562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,4,128,0,1,fp8,fp8,0,1.31550931930542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,float16,0,1.549023946126302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,8,128,0,1,float16,fp8,0,1.5519359906514485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,float16,0,0.913050651550293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,48,8,128,0,1,fp8,fp8,0,1.3557920455932617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,48,128,0,1,float16,fp8,0,0.9494720300038656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,48,128,0,1,fp8,fp8,0,0.8013652960459391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,float16,0,0.8292906284332275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,2,128,0,1,float16,fp8,0,0.8330079714457194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,2,128,0,1,fp8,fp8,0,0.7237813472747803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,float16,0,0.8423360188802084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,4,128,0,1,float16,fp8,0,0.8350133101145426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,4,128,0,1,fp8,fp8,0,0.727242628733317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,float16,0,0.8416533470153809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,8,128,0,1,float16,fp8,0,0.8412746588389078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,float16,0,0.5262186527252197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,48,8,128,0,1,fp8,fp8,0,0.7319093545277914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,48,128,0,1,float16,fp8,0,0.53275199731191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,48,128,0,1,fp8,fp8,0,0.4599146842956543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,float16,0,0.478218674659729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,2,128,0,1,float16,fp8,0,0.47809600830078125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,2,128,0,1,fp8,fp8,0,0.42978131771087646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,float16,0,0.4815839926401774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,4,128,0,1,fp8,fp8,0,0.4328106641769409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,4,128,0,1,float16,fp8,0,0.48718400796254474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,float16,0,0.4899093310038249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,8,128,0,1,float16,fp8,0,0.48837331930796307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,48,8,128,0,1,fp8,fp8,0,0.435263991355896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,2,128,0,1,fp8,fp8,0,5.170405387878418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,float16,0,6.128693262736003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,2,128,0,1,float16,fp8,0,6.093599955240886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,float16,0,6.167168299357097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,4,128,0,1,fp8,fp8,0,5.1972001393636065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,4,128,0,1,float16,fp8,0,6.113477071126302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,float16,0,6.211045583089192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,8,128,0,1,float16,fp8,0,6.336906433105469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,float16,0,3.4623893102010093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,48,128,0,1,fp8,fp8,0,2.944368044535319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,48,128,0,1,float16,fp8,0,3.4848480224609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,48,8,128,0,1,fp8,fp8,0,5.248666763305664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,float16,0,3.0309972763061523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,2,128,0,1,fp8,fp8,0,2.5837814013163247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,2,128,0,1,float16,fp8,0,3.0107733408610025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,float16,0,3.04150390625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,4,128,0,1,float16,fp8,0,3.1011838912963867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,4,128,0,1,fp8,fp8,0,2.6009440422058105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,float16,0,3.1156746546427407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,8,128,0,1,float16,fp8,0,3.1109867095947266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,float16,0,1.7632640202840169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,48,8,128,0,1,fp8,fp8,0,2.623706658681234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,48,128,0,1,float16,fp8,0,1.7887040774027507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,48,128,0,1,fp8,fp8,0,1.537450631459554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,float16,0,1.5389599800109863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,2,128,0,1,float16,fp8,0,1.5503840446472168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,2,128,0,1,fp8,fp8,0,1.3350826899210613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,float16,0,1.5596319834391277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,4,128,0,1,float16,fp8,0,1.5612212816874187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,4,128,0,1,fp8,fp8,0,1.3460267384847004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,float16,0,1.5697174072265625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,8,128,0,1,float16,fp8,0,1.5895412762959797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,48,8,128,0,1,fp8,fp8,0,1.360629399617513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,float16,0,0.9264266490936279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,48,128,0,1,float16,fp8,0,0.9412639935811361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,48,128,0,1,fp8,fp8,0,0.8072906335194906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,float16,0,0.8180800278981527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,2,128,0,1,float16,fp8,0,0.8167626857757568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,2,128,0,1,fp8,fp8,0,0.7159519990285238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,float16,0,0.8257226943969727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,4,128,0,1,fp8,fp8,0,0.7201706568400065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,4,128,0,1,float16,fp8,0,0.8285280068715414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,float16,0,0.834000031153361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,8,128,0,1,float16,fp8,0,0.837013324101766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,float16,0,0.5149066845575968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,48,8,128,0,1,fp8,fp8,0,0.7271093527475992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,48,128,0,1,float16,fp8,0,0.5218986670176188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,48,128,0,1,fp8,fp8,0,0.45125333468119305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,float16,0,0.4562133153279622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,2,128,0,1,float16,fp8,0,0.45421334107716876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,2,128,0,1,fp8,fp8,0,0.4064319928487142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,float16,0,0.4590826829274495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,4,128,0,1,fp8,fp8,0,0.4085226853688558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,4,128,0,1,float16,fp8,0,0.4625120162963867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,float16,0,0.46344534556070965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,8,128,0,1,float16,fp8,0,0.4680853287378947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,float16,0,0.30660800139109295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,48,128,0,1,float16,fp8,0,0.31062932809193927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,48,8,128,0,1,fp8,fp8,0,0.4124000072479248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,48,128,0,1,fp8,fp8,0,0.2736639976501465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,float16,0,0.2686613400777181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,2,128,0,1,fp8,fp8,0,0.24936532974243164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,2,128,0,1,float16,fp8,0,0.2687573234240214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,float16,0,0.2721760074297587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,4,128,0,1,fp8,fp8,0,0.2510346571604411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,4,128,0,1,float16,fp8,0,0.2712373336156209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,float16,0,0.27694932619730633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,8,128,0,1,float16,fp8,0,0.2765066623687744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,48,8,128,0,1,fp8,fp8,0,0.2544959982236226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,2,128,0,1,fp8,fp8,0,3.354602813720703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,float16,0,3.8621867497762046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,2,128,0,1,float16,fp8,0,3.89739195505778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,float16,0,3.8701705932617188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,4,128,0,1,fp8,fp8,0,3.3734238942464194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,4,128,0,1,float16,fp8,0,3.922133445739746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,float16,0,3.9561707178751626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,8,128,0,1,float16,fp8,0,3.9711360931396484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,float16,0,2.237130641937256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,48,128,0,1,float16,fp8,0,2.261610666910807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,48,128,0,1,fp8,fp8,0,1.936570644378662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,48,8,128,0,1,fp8,fp8,0,3.4180479049682617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,float16,0,1.9433600107828777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,2,128,0,1,fp8,fp8,0,1.6905066172281902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,2,128,0,1,float16,fp8,0,1.9460852940877278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,float16,0,1.9592053095499675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,4,128,0,1,fp8,fp8,0,1.7011946042378743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,4,128,0,1,float16,fp8,0,1.9677119255065918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,float16,0,1.9902027448018391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,8,128,0,1,float16,fp8,0,1.9991520245869954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,float16,0,1.153439998626709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,48,128,0,1,float16,fp8,0,1.1712853113810222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,48,8,128,0,1,fp8,fp8,0,1.721765359242757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,48,128,0,1,fp8,fp8,0,1.0097546577453613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,float16,0,1.0113066832224529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,2,128,0,1,fp8,fp8,0,0.8830719788869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,2,128,0,1,float16,fp8,0,1.0101386706034343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,float16,0,1.0171146392822266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,4,128,0,1,float16,fp8,0,1.020576000213623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,4,128,0,1,fp8,fp8,0,0.8880586624145508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,float16,0,1.031381368637085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,8,128,0,1,float16,fp8,0,1.039904038111369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,float16,0,0.6125919818878174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,48,8,128,0,1,fp8,fp8,0,0.9018186728159586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,48,128,0,1,float16,fp8,0,0.6259413162867228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,48,128,0,1,fp8,fp8,0,0.5425706704457601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,float16,0,0.5390826861063639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,2,128,0,1,fp8,fp8,0,0.480186661084493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,2,128,0,1,float16,fp8,0,0.546554684638977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,float16,0,0.5469706853230795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,4,128,0,1,float16,fp8,0,0.5459093252817789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,4,128,0,1,fp8,fp8,0,0.4823999802271525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,float16,0,0.5548213322957357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,8,128,0,1,fp8,fp8,0,0.48841599623362225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,float16,0,0.349509318669637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,48,8,128,0,1,float16,fp8,0,0.5574880043665568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,48,128,0,1,float16,fp8,0,0.3531200091044108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,float16,0,0.3036053379376729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,48,128,0,1,fp8,fp8,0,0.3099786639213562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,2,128,0,1,float16,fp8,0,0.3039253354072571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,2,128,0,1,fp8,fp8,0,0.2788426677385966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,float16,0,0.3065653244654338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,4,128,0,1,float16,fp8,0,0.30642666419347125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,4,128,0,1,fp8,fp8,0,0.2816266616185506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,float16,0,0.3144693374633789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,8,128,0,1,fp8,fp8,0,0.2839733362197876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,48,8,128,0,1,float16,fp8,0,0.3128426671028137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,48,128,0,1,fp8,fp8,0,0.19113600254058838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,fp8,0,0.21853333711624146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,48,128,0,1,float16,float16,0,0.21380800008773804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,float16,0,0.1844586730003357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,2,128,0,1,fp8,fp8,0,0.17096533377965292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,2,128,0,1,float16,fp8,0,0.18730133771896362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,4,128,0,1,fp8,fp8,0,0.17202132940292358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,float16,0,0.1862773299217224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,4,128,0,1,float16,fp8,0,0.185914675394694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,8,128,0,1,fp8,fp8,0,0.17576533555984497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,fp8,0,0.1886613368988037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,48,8,128,0,1,float16,float16,0,0.18600000937779745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,2,128,0,1,fp8,fp8,0,3.7981974283854165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,fp8,0,4.312186559041341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,2,128,0,1,float16,float16,0,4.33676815032959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,float16,0,4.330533345540364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,4,128,0,1,fp8,fp8,0,3.8145761489868164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,4,128,0,1,float16,fp8,0,4.334010759989421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,float16,0,4.423856099446614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,float16,0,2.482405344645182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,48,128,0,1,float16,fp8,0,2.521205266316732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,8,128,0,1,fp8,fp8,0,3.8784265518188477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,48,8,128,0,1,float16,fp8,0,4.428213437398274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,48,128,0,1,fp8,fp8,0,2.1925600369771323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,float16,0,2.1428426106770835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,2,128,0,1,float16,fp8,0,2.151306629180908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,2,128,0,1,fp8,fp8,0,1.8893973032633464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,float16,0,2.149888038635254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,4,128,0,1,float16,fp8,0,2.172287940979004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,4,128,0,1,fp8,fp8,0,1.9027679761250813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,float16,0,2.1914240519205728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,float16,0,1.270405371983846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,8,128,0,1,float16,fp8,0,2.1993813514709473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,48,128,0,1,float16,fp8,0,1.2889706293741863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,48,8,128,0,1,fp8,fp8,0,1.9290080070495605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,48,128,0,1,fp8,fp8,0,1.1264106432596843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,float16,0,1.090831995010376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,2,128,0,1,fp8,fp8,0,0.9700799783070883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,2,128,0,1,float16,fp8,0,1.100874662399292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,float16,0,1.1085440317789714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,4,128,0,1,float16,fp8,0,1.1044212977091472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,4,128,0,1,fp8,fp8,0,0.9785493214925131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,float16,0,1.1242612997690837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,float16,0,0.664138674736023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,8,128,0,1,float16,fp8,0,1.1332106590270996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,48,128,0,1,float16,fp8,0,0.6772212982177734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,48,8,128,0,1,fp8,fp8,0,0.9902559916178385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,48,128,0,1,fp8,fp8,0,0.5949600140253702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,float16,0,0.5766559839248657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,2,128,0,1,float16,fp8,0,0.5782560110092163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,2,128,0,1,fp8,fp8,0,0.5150719881057739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,float16,0,0.5828693310419718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,4,128,0,1,float16,fp8,0,0.5863893429438273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,4,128,0,1,fp8,fp8,0,0.5181333223978678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,float16,0,0.5882933139801025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,float16,0,0.3612373272577922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,48,128,0,1,float16,fp8,0,0.36954132715861004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,8,128,0,1,fp8,fp8,0,0.5246986548105875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,48,8,128,0,1,float16,fp8,0,0.5956693490346273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,48,128,0,1,fp8,fp8,0,0.32598400115966797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,float16,0,0.3131200075149536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,2,128,0,1,float16,fp8,0,0.3163253267606099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,2,128,0,1,fp8,fp8,0,0.28573866685231525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,float16,0,0.31749866406122845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,4,128,0,1,float16,fp8,0,0.31774399677912396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,4,128,0,1,fp8,fp8,0,0.287882665793101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,float16,0,0.3237280050913493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,8,128,0,1,float16,fp8,0,0.32611199220021564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,float16,0,0.21223467588424683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,48,8,128,0,1,fp8,fp8,0,0.2905706763267517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,48,128,0,1,float16,fp8,0,0.21407467126846313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,48,128,0,1,fp8,fp8,0,0.19128533204396567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,float16,0,0.18042665719985962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,2,128,0,1,float16,fp8,0,0.17908799648284912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,2,128,0,1,fp8,fp8,0,0.16916267077128092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,float16,0,0.18082666397094727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,4,128,0,1,float16,fp8,0,0.18226132790247598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,4,128,0,1,fp8,fp8,0,0.17082667350769043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,float16,0,0.18523200352986655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,float16,0,0.13116266330083212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,8,128,0,1,fp8,fp8,0,0.17371733983357748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,48,8,128,0,1,float16,fp8,0,0.18576000134150186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,48,128,0,1,float16,fp8,0,0.13359999656677246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,48,128,0,1,fp8,fp8,0,0.12300266822179158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,float16,0,0.11752532919247945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,2,128,0,1,float16,fp8,0,0.11771200100580852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,2,128,0,1,fp8,fp8,0,0.11127466956774394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,float16,0,0.11628266175587972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,4,128,0,1,float16,fp8,0,0.11779733498891194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,4,128,0,1,fp8,fp8,0,0.11012799541155498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,float16,0,0.11745066444079082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,8,128,0,1,float16,fp8,0,0.11764267086982727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,48,8,128,0,1,fp8,fp8,0,0.11214400331179301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,2,128,0,1,fp8,fp8,0,2.595786730448405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,float16,0,2.9260533650716147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,2,128,0,1,float16,fp8,0,2.9439786275227866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,float16,0,2.9617652893066406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,4,128,0,1,fp8,fp8,0,2.606325308481852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,4,128,0,1,float16,fp8,0,2.9654614130655923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,float16,0,3.0429226557413735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,float16,0,1.7065332730611165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,8,128,0,1,fp8,fp8,0,2.6406613985697427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,48,8,128,0,1,float16,fp8,0,3.029578526814779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,48,128,0,1,float16,fp8,0,1.729728062947591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,48,128,0,1,fp8,fp8,0,1.520095984141032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,float16,0,1.4590986569722493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,2,128,0,1,float16,fp8,0,1.4649866422017415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,2,128,0,1,fp8,fp8,0,1.2956480185190837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,float16,0,1.4766453107198079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,4,128,0,1,float16,fp8,0,1.4867572784423828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,4,128,0,1,fp8,fp8,0,1.3101812998453777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,float16,0,1.5042667388916016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,8,128,0,1,fp8,fp8,0,1.3239413102467854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,48,8,128,0,1,float16,fp8,0,1.515973409016927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,float16,0,0.875653346379598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,48,128,0,1,float16,fp8,0,0.8910666306813558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,48,128,0,1,fp8,fp8,0,0.784272034962972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,float16,0,0.7541493574778239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,2,128,0,1,float16,fp8,0,0.7549493312835693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,2,128,0,1,fp8,fp8,0,0.6710826555887858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,float16,0,0.7627039750417074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,4,128,0,1,float16,fp8,0,0.7640480200449625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,4,128,0,1,fp8,fp8,0,0.6764480272928873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,float16,0,0.7769866784413656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,float16,0,0.4620800018310547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,8,128,0,1,fp8,fp8,0,0.6854879856109619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,48,8,128,0,1,float16,fp8,0,0.7815679709116617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,48,128,0,1,float16,fp8,0,0.47217599550882977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,float16,0,0.4002186854680379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,48,128,0,1,fp8,fp8,0,0.4181813398996989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,2,128,0,1,float16,fp8,0,0.4034453233083089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,2,128,0,1,fp8,fp8,0,0.3598293463389079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,float16,0,0.405349334081014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,4,128,0,1,fp8,fp8,0,0.3635733524958293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,4,128,0,1,float16,fp8,0,0.40666667620340985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,float16,0,0.4113813241322835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,float16,0,0.2560799916585286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,8,128,0,1,float16,fp8,0,0.4150400161743164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,48,8,128,0,1,fp8,fp8,0,0.3672853310902913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,48,128,0,1,float16,fp8,0,0.2615306576093038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,48,128,0,1,fp8,fp8,0,0.23266667127609253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,2,128,0,1,fp8,fp8,0,0.20360000928243002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,float16,0,0.21921600898106894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,2,128,0,1,float16,fp8,0,0.21973333756128946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,float16,0,0.22099733352661133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,4,128,0,1,fp8,fp8,0,0.20508267482121786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,4,128,0,1,float16,fp8,0,0.2246506611506144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,float16,0,0.22764267524083456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,float16,0,0.1530080040295919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,8,128,0,1,float16,fp8,0,0.22992000977198282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,48,128,0,1,float16,fp8,0,0.15413866440455118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,48,8,128,0,1,fp8,fp8,0,0.20775467157363892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,48,128,0,1,fp8,fp8,0,0.1383946637312571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,float16,0,0.1267039974530538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,2,128,0,1,float16,fp8,0,0.1281706690788269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,2,128,0,1,fp8,fp8,0,0.11761066317558289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,float16,0,0.12866133451461792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,4,128,0,1,float16,fp8,0,0.12827199697494507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,4,128,0,1,fp8,fp8,0,0.11797866225242615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,float16,0,0.12987200419108072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,8,128,0,1,float16,fp8,0,0.1320693294207255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,float16,0,0.09310932954152425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,48,8,128,0,1,fp8,fp8,0,0.12396799524625142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,48,128,0,1,float16,fp8,0,0.09318932890892029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,48,128,0,1,fp8,fp8,0,0.09135466814041138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,float16,0,0.08684266606966655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,2,128,0,1,float16,fp8,0,0.08686400453249614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,2,128,0,1,fp8,fp8,0,0.08266133566697438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,float16,0,0.08705066641171773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,4,128,0,1,float16,fp8,0,0.08892266949017842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,4,128,0,1,fp8,fp8,0,0.08297066887219746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,float16,0,0.08739200234413147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,8,128,0,1,float16,fp8,0,0.08701333403587341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,48,8,128,0,1,fp8,fp8,0,0.08278400202592213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,float16,0,3.268517176310221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,2,128,0,1,float16,fp8,0,3.2855520248413086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,2,128,0,1,fp8,fp8,0,3.1864426930745444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,float16,0,3.28110408782959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,4,128,0,1,fp8,fp8,0,3.27239990234375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,4,128,0,1,float16,fp8,0,3.30403741200765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,float16,0,3.3867200215657554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,8,128,0,1,float16,fp8,0,3.3917919794718423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,float16,0,1.914672056833903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,48,128,0,1,float16,fp8,0,1.8925280570983887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,48,128,0,1,fp8,fp8,0,1.8326026598612468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,48,8,128,0,1,fp8,fp8,0,3.270853360493978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,float16,0,1.5821866989135742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,2,128,0,1,float16,fp8,0,1.5947839419047039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,2,128,0,1,fp8,fp8,0,1.5310452779134114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,float16,0,1.5918827056884766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,4,128,0,1,float16,fp8,0,1.6136746406555176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,4,128,0,1,fp8,fp8,0,1.508837381998698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,float16,0,1.6409173011779785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,float16,0,0.9535786310831705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,48,128,0,1,float16,fp8,0,0.9416639804840088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,8,128,0,1,float16,fp8,0,1.6464266777038574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,48,8,128,0,1,fp8,fp8,0,1.5960267384847004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,float16,0,0.802735964457194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,48,128,0,1,fp8,fp8,0,0.9141653378804525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,2,128,0,1,float16,fp8,0,0.803541342417399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,2,128,0,1,fp8,fp8,0,0.7473226388295492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,float16,0,0.8079626560211182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,4,128,0,1,float16,fp8,0,0.8096160093943278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,4,128,0,1,fp8,fp8,0,0.7559146881103516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,float16,0,0.8313813209533691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,8,128,0,1,float16,fp8,0,0.8302186330159506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,float16,0,0.4932639996210734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,48,8,128,0,1,fp8,fp8,0,0.7840960025787354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,48,128,0,1,float16,fp8,0,0.48137064774831134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,48,128,0,1,fp8,fp8,0,0.46962666511535645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,float16,0,0.41313600540161133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,2,128,0,1,float16,fp8,0,0.4134879906972249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,2,128,0,1,fp8,fp8,0,0.3845653136571248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,float16,0,0.4162079890569051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,4,128,0,1,float16,fp8,0,0.41621867815653485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,4,128,0,1,fp8,fp8,0,0.38707200686136883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,float16,0,0.4293706814448039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,8,128,0,1,float16,fp8,0,0.426581343015035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,float16,0,0.2613333264986674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,48,8,128,0,1,fp8,fp8,0,0.4030880133310954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,48,128,0,1,float16,fp8,0,0.25489600499471027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,48,128,0,1,fp8,fp8,0,0.24661866823832193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,float16,0,0.21997867027918497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,2,128,0,1,float16,fp8,0,0.21941866477330527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,2,128,0,1,fp8,fp8,0,0.20466132958730063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,float16,0,0.22272533178329468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,4,128,0,1,float16,fp8,0,0.22141865889231363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,4,128,0,1,fp8,fp8,0,0.2059626579284668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,fp8,0,0.22758400440216064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,8,128,0,1,float16,float16,0,0.22891199588775635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,48,8,128,0,1,fp8,fp8,0,0.2141866683959961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,float16,0,0.14620799819628397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,48,128,0,1,float16,fp8,0,0.14272000392278036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,48,128,0,1,fp8,fp8,0,0.13657599687576294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,float16,0,0.11866133411725362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,2,128,0,1,fp8,fp8,0,0.10984533031781514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,2,128,0,1,float16,fp8,0,0.11755733688672383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,float16,0,0.11989333232243855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,4,128,0,1,fp8,fp8,0,0.11209066708882649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,float16,0,0.12333333492279053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,4,128,0,1,float16,fp8,0,0.12035199999809265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,8,128,0,1,float16,fp8,0,0.12371733784675598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,48,8,128,0,1,fp8,fp8,0,0.11818666259447734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,float16,0,0.08494399984677632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,48,128,0,1,fp8,fp8,0,0.08257066706816356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,48,128,0,1,float16,fp8,0,0.082997332016627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,float16,0,0.0699839989344279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,2,128,0,1,float16,fp8,0,0.07051200171311696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,2,128,0,1,fp8,fp8,0,0.06435733536879222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,float16,0,0.07122666637102763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,4,128,0,1,float16,fp8,0,0.07035199801127116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,float16,0,0.07206400235493977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,4,128,0,1,fp8,fp8,0,0.06444799900054932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,8,128,0,1,float16,fp8,0,0.07096000015735626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,48,8,128,0,1,fp8,fp8,0,0.06434133152167003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,float16,0,0.04980266590913137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,48,128,0,1,float16,fp8,0,0.050026665131251015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,48,128,0,1,fp8,fp8,0,0.04618666569391886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,float16,0,0.04685866832733154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,2,128,0,1,float16,fp8,0,0.048341333866119385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,2,128,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,float16,0,0.04799999793370565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,4,128,0,1,float16,fp8,0,0.04773866633574168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,4,128,0,1,fp8,fp8,0,0.04386133452256521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,float16,0,0.04795733094215393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,8,128,0,1,float16,fp8,0,0.0480320006608963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,48,8,128,0,1,fp8,fp8,0,0.04409599800904592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,2,128,0,1,float16,float16,0,2.8592214584350586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,2,128,0,1,float16,fp8,0,2.8974345525105796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,2,128,0,1,fp8,fp8,0,2.859247843424479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,4,128,0,1,float16,float16,0,2.8887573877970376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,4,128,0,1,float16,fp8,0,2.895392100016276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,4,128,0,1,fp8,fp8,0,2.9349120457967124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,8,128,0,1,float16,float16,0,2.9556105931599936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,8,128,0,1,float16,fp8,0,2.972480138142904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,48,128,0,1,float16,float16,0,1.6816213925679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,48,128,0,1,float16,fp8,0,1.6688906351725261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,48,128,0,1,fp8,fp8,0,1.647509256998698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,48,8,128,0,1,fp8,fp8,0,2.8986291885375977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,2,128,0,1,float16,float16,0,1.383504072825114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,2,128,0,1,float16,fp8,0,1.3973120053609211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,2,128,0,1,fp8,fp8,0,1.404549280802409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,4,128,0,1,float16,float16,0,1.3956960042317708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,4,128,0,1,fp8,fp8,0,1.3677760759989421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,4,128,0,1,float16,fp8,0,1.4029067357381184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,8,128,0,1,float16,float16,0,1.4296107292175293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,8,128,0,1,float16,fp8,0,1.442431926727295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,48,128,0,1,float16,float16,0,0.8348266283671061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,48,128,0,1,float16,fp8,0,0.821114699045817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,48,8,128,0,1,fp8,fp8,0,1.4353334108988445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,2,128,0,1,float16,fp8,0,0.7013920148213705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,2,128,0,1,float16,float16,0,0.7009013493855795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,48,128,0,1,fp8,fp8,0,0.8201173146565756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,2,128,0,1,fp8,fp8,0,0.6597760121027628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,4,128,0,1,float16,float16,0,0.7046026388804117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,4,128,0,1,float16,fp8,0,0.7070879936218262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,4,128,0,1,fp8,fp8,0,0.6666719913482666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,8,128,0,1,float16,float16,0,0.7235466639200846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,8,128,0,1,float16,fp8,0,0.7249333063761393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,48,8,128,0,1,fp8,fp8,0,0.6967946688334147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,48,128,0,1,fp8,fp8,0,0.4214560190836589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,48,128,0,1,float16,fp8,0,0.42185068130493164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,48,128,0,1,float16,float16,0,0.43188798427581787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,2,128,0,1,float16,fp8,0,0.36319466431935626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,2,128,0,1,fp8,fp8,0,0.33772798379262287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,2,128,0,1,float16,float16,0,0.3598613341649373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,4,128,0,1,float16,float16,0,0.363045334815979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,4,128,0,1,fp8,fp8,0,0.34116800626118976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,4,128,0,1,float16,fp8,0,0.36311999956766766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,8,128,0,1,float16,float16,0,0.37281068166097003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,8,128,0,1,fp8,fp8,0,0.3561546802520752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,48,8,128,0,1,float16,fp8,0,0.37214934825897217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,48,128,0,1,float16,float16,0,0.2300800085067749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,48,128,0,1,float16,fp8,0,0.22362667322158813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,48,128,0,1,fp8,fp8,0,0.222053329149882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,2,128,0,1,float16,float16,0,0.1923733353614807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,2,128,0,1,float16,fp8,0,0.19325866301854452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,2,128,0,1,fp8,fp8,0,0.1811466614405314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,4,128,0,1,float16,float16,0,0.19236266613006592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,4,128,0,1,float16,fp8,0,0.19400533040364584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,4,128,0,1,fp8,fp8,0,0.18127999703089395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,8,128,0,1,float16,float16,0,0.19797333081563315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,8,128,0,1,float16,fp8,0,0.19825067122777304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,48,8,128,0,1,fp8,fp8,0,0.18990933895111084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,48,128,0,1,float16,float16,0,0.12589333454767862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,48,128,0,1,float16,fp8,0,0.12258133292198181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,48,128,0,1,fp8,fp8,0,0.12215999762217204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,2,128,0,1,float16,float16,0,0.10327999790509541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,2,128,0,1,float16,fp8,0,0.10351999600728352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,2,128,0,1,fp8,fp8,0,0.0972213347752889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,4,128,0,1,float16,float16,0,0.1049013336499532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,4,128,0,1,float16,fp8,0,0.10443733135859172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,4,128,0,1,fp8,fp8,0,0.09923733274141948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,8,128,0,1,float16,float16,0,0.10738133390744527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,8,128,0,1,float16,fp8,0,0.1079200009504954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,48,8,128,0,1,fp8,fp8,0,0.10487467050552368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,48,128,0,1,float16,float16,0,0.07297599812348683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,48,128,0,1,float16,fp8,0,0.0717386653025945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,48,128,0,1,fp8,fp8,0,0.0745119998852412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,2,128,0,1,float16,float16,0,0.06010666489601135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,2,128,0,1,float16,fp8,0,0.06188266475995382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,2,128,0,1,fp8,fp8,0,0.05619733532269796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,4,128,0,1,float16,float16,0,0.06196799874305725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,4,128,0,1,float16,fp8,0,0.062047998110453285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,4,128,0,1,fp8,fp8,0,0.05569600065549215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,8,128,0,1,float16,float16,0,0.062421331803003945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,8,128,0,1,float16,fp8,0,0.06208533545335134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,48,128,0,1,float16,float16,0,0.04378133515516917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,48,8,128,0,1,fp8,fp8,0,0.057189335425694786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,48,128,0,1,fp8,fp8,0,0.04152533411979675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,2,128,0,1,float16,float16,0,0.04165866722663244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,2,128,0,1,float16,fp8,0,0.04176533222198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,48,128,0,1,float16,fp8,0,0.04430399835109711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,4,128,0,1,float16,float16,0,0.04053866614898046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,2,128,0,1,fp8,fp8,0,0.03794133414824804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,4,128,0,1,float16,fp8,0,0.04155199974775314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,4,128,0,1,fp8,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,8,128,0,1,float16,float16,0,0.041840001940727234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,8,128,0,1,float16,fp8,0,0.04177600145339966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,48,8,128,0,1,fp8,fp8,0,0.037418665985266365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,48,128,0,1,float16,float16,0,0.03010133405526479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,48,128,0,1,float16,fp8,0,0.0312266672650973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,48,128,0,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,2,128,0,1,float16,float16,0,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,4,128,0,1,float16,float16,0,0.02914133419593175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,2,128,0,1,fp8,fp8,0,0.027679999669392902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,2,128,0,1,float16,fp8,0,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,4,128,0,1,float16,fp8,0,0.02921066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,4,128,0,1,fp8,fp8,0,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,8,128,0,1,float16,float16,0,0.02916266769170761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,8,128,0,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,48,8,128,0,1,float16,fp8,0,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,2,128,0,1,float16,float16,0,1.2697813510894775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,2,128,0,1,float16,fp8,0,1.2931466897328694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,2,128,0,1,fp8,fp8,0,1.2866453329722087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,4,128,0,1,float16,float16,0,1.2758933703104656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,4,128,0,1,float16,fp8,0,1.288256009419759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,4,128,0,1,fp8,fp8,0,1.278106689453125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,8,128,0,1,float16,float16,0,1.3259092966715496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,8,128,0,1,float16,fp8,0,1.3363787333170574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,48,128,0,1,float16,float16,0,0.7669653097788492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,48,128,0,1,float16,fp8,0,0.7479840119679769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,48,128,0,1,fp8,fp8,0,0.7678453127543131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,48,8,128,0,1,fp8,fp8,0,1.3139306704203289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,2,128,0,1,float16,float16,0,0.6416000127792358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,2,128,0,1,float16,fp8,0,0.6444746653238932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,2,128,0,1,fp8,fp8,0,0.6054506699244181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,4,128,0,1,float16,float16,0,0.6458933353424072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,4,128,0,1,float16,fp8,0,0.6431306600570679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,4,128,0,1,fp8,fp8,0,0.6114826599756876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,8,128,0,1,float16,float16,0,0.6650186777114868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,8,128,0,1,float16,fp8,0,0.6597333351771036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,48,128,0,1,float16,float16,0,0.39709333578745526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,48,8,128,0,1,fp8,fp8,0,0.6402186552683512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,48,128,0,1,float16,fp8,0,0.3885813156763713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,2,128,0,1,float16,float16,0,0.3316319982210795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,48,128,0,1,fp8,fp8,0,0.3961493174235026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,2,128,0,1,float16,fp8,0,0.33236799637476605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,2,128,0,1,fp8,fp8,0,0.3129013379414876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,4,128,0,1,float16,float16,0,0.33313600222269696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,4,128,0,1,float16,fp8,0,0.33459198474884033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,4,128,0,1,fp8,fp8,0,0.3160533308982849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,8,128,0,1,float16,float16,0,0.34436265627543133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,8,128,0,1,float16,fp8,0,0.3421013355255127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,48,128,0,1,float16,float16,0,0.21265600124994913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,48,8,128,0,1,fp8,fp8,0,0.32917867104212445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,48,128,0,1,float16,fp8,0,0.20683199167251587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,48,128,0,1,fp8,fp8,0,0.20950400829315186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,2,128,0,1,float16,float16,0,0.1775839924812317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,2,128,0,1,float16,fp8,0,0.17718400557835898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,2,128,0,1,fp8,fp8,0,0.1660160024960836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,4,128,0,1,float16,float16,0,0.1794346570968628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,4,128,0,1,float16,fp8,0,0.1778986652692159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,4,128,0,1,fp8,fp8,0,0.16740800937016806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,8,128,0,1,float16,float16,0,0.1839253306388855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,8,128,0,1,float16,fp8,0,0.18360533316930136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,48,8,128,0,1,fp8,fp8,0,0.17595734198888144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,48,128,0,1,float16,float16,0,0.1197119951248169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,48,128,0,1,fp8,fp8,0,0.11578133702278137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,48,128,0,1,float16,fp8,0,0.11572800079981486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,2,128,0,1,float16,float16,0,0.09807466467221577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,2,128,0,1,fp8,fp8,0,0.09108266234397888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,2,128,0,1,float16,fp8,0,0.09707732995351155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,4,128,0,1,float16,float16,0,0.09924800197283427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,4,128,0,1,float16,fp8,0,0.09925867120424907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,4,128,0,1,fp8,fp8,0,0.09268266956011455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,8,128,0,1,float16,float16,0,0.10146133104960124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,48,128,0,1,float16,float16,0,0.06594666838645935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,8,128,0,1,float16,fp8,0,0.10100266337394714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,48,8,128,0,1,fp8,fp8,0,0.09827199578285217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,2,128,0,1,float16,float16,0,0.056143999099731445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,48,128,0,1,float16,fp8,0,0.0643093337615331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,48,128,0,1,fp8,fp8,0,0.06826133529345195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,2,128,0,1,float16,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,2,128,0,1,fp8,fp8,0,0.05180266499519348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,4,128,0,1,float16,float16,0,0.05707733333110809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,4,128,0,1,float16,fp8,0,0.055829331278800964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,4,128,0,1,fp8,fp8,0,0.05230399966239929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,8,128,0,1,float16,float16,0,0.057631999254226685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,8,128,0,1,float16,fp8,0,0.056015998125076294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,48,128,0,1,float16,float16,0,0.039664000272750854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,48,8,128,0,1,fp8,fp8,0,0.05204799771308899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,48,128,0,1,float16,fp8,0,0.03994666785001755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,48,128,0,1,fp8,fp8,0,0.038176000118255615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,2,128,0,1,float16,float16,0,0.038560000558694206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,2,128,0,1,float16,fp8,0,0.03743999948104223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,2,128,0,1,fp8,fp8,0,0.03475199888149897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,4,128,0,1,float16,float16,0,0.03739733248949051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,4,128,0,1,float16,fp8,0,0.0391146664818128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,4,128,0,1,fp8,fp8,0,0.034330666065216064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,8,128,0,1,float16,float16,0,0.03757333258787791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,8,128,0,1,float16,fp8,0,0.039690665900707245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,48,8,128,0,1,fp8,fp8,0,0.03583999971548716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,48,128,0,1,float16,float16,0,0.02808533360560735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,48,128,0,1,float16,fp8,0,0.02881066749493281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,48,128,0,1,fp8,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,2,128,0,1,float16,float16,0,0.027104000250498455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,2,128,0,1,float16,fp8,0,0.027082666754722595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,2,128,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,4,128,0,1,float16,float16,0,0.02699733277161916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,4,128,0,1,float16,fp8,0,0.026506667335828144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,4,128,0,1,fp8,fp8,0,0.025909334421157837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,8,128,0,1,float16,float16,0,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,8,128,0,1,float16,fp8,0,0.0266239990790685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,48,8,128,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,48,128,0,1,float16,float16,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,48,128,0,1,float16,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,48,128,0,1,fp8,fp8,0,0.02162666618824005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,2,128,0,1,float16,float16,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,2,128,0,1,float16,fp8,0,0.023285334308942158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,2,128,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,4,128,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,4,128,0,1,float16,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,8,128,0,1,float16,float16,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,8,128,0,1,float16,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,4,128,0,1,fp8,fp8,0,0.021536000072956085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,48,8,128,0,1,fp8,fp8,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,2,128,0,1,float16,float16,0,0.6903786659240723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,2,128,0,1,float16,fp8,0,0.6915253003438314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,4,128,0,1,float16,float16,0,0.6925813357035319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,2,128,0,1,fp8,fp8,0,0.6627626816431681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,4,128,0,1,float16,fp8,0,0.6959839661916097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,4,128,0,1,fp8,fp8,0,0.6735040346781412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,8,128,0,1,float16,float16,0,0.7081279754638672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,8,128,0,1,float16,fp8,0,0.7071359952290853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,48,128,0,1,float16,float16,0,0.4161119858423869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,48,128,0,1,float16,fp8,0,0.40889068444569904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,48,128,0,1,fp8,fp8,0,0.4195359945297241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,48,8,128,0,1,fp8,fp8,0,0.693125327428182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,2,128,0,1,float16,float16,0,0.351637323697408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,2,128,0,1,float16,fp8,0,0.35260268052419025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,2,128,0,1,fp8,fp8,0,0.33739733695983887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,4,128,0,1,float16,float16,0,0.35363201300303143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,4,128,0,1,float16,fp8,0,0.35395201047261554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,4,128,0,1,fp8,fp8,0,0.3414613405863444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,8,128,0,1,float16,float16,0,0.3624266783396403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,8,128,0,1,float16,fp8,0,0.362773338953654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,48,8,128,0,1,fp8,fp8,0,0.35278932253519696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,48,128,0,1,float16,float16,0,0.21932266155878702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,48,128,0,1,float16,fp8,0,0.21543467044830322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,48,128,0,1,fp8,fp8,0,0.21965332825978598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,2,128,0,1,float16,float16,0,0.18553600708643594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,2,128,0,1,float16,fp8,0,0.18578133980433145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,2,128,0,1,fp8,fp8,0,0.17848533391952515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,4,128,0,1,float16,float16,0,0.18699200948079428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,4,128,0,1,float16,fp8,0,0.1869866649309794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,4,128,0,1,fp8,fp8,0,0.17868266503016153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,8,128,0,1,float16,float16,0,0.19278399149576822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,8,128,0,1,float16,fp8,0,0.1902079979578654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,48,8,128,0,1,fp8,fp8,0,0.18573333819707236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,48,128,0,1,float16,float16,0,0.11954666177431743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,48,128,0,1,float16,fp8,0,0.1164959967136383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,48,128,0,1,fp8,fp8,0,0.12172800302505493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,2,128,0,1,float16,float16,0,0.10123200217882793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,2,128,0,1,float16,fp8,0,0.10103999574979146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,2,128,0,1,fp8,fp8,0,0.09546132882436116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,4,128,0,1,float16,float16,0,0.10113599896430969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,4,128,0,1,float16,fp8,0,0.101583997408549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,4,128,0,1,fp8,fp8,0,0.09696533282597859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,8,128,0,1,float16,float16,0,0.10476799805959065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,8,128,0,1,float16,fp8,0,0.1039573351542155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,48,8,128,0,1,fp8,fp8,0,0.10221866766611735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,48,128,0,1,float16,float16,0,0.06856533388296764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,48,128,0,1,float16,fp8,0,0.06818133095900218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,48,128,0,1,fp8,fp8,0,0.07177599767843883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,2,128,0,1,float16,float16,0,0.06000000238418579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,2,128,0,1,float16,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,2,128,0,1,fp8,fp8,0,0.05453866720199585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,4,128,0,1,float16,float16,0,0.06029333174228668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,4,128,0,1,float16,fp8,0,0.060362666845321655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,4,128,0,1,fp8,fp8,0,0.05450133482615153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,8,128,0,1,float16,float16,0,0.059248000383377075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,8,128,0,1,float16,fp8,0,0.05942399799823761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,48,128,0,1,float16,float16,0,0.03809066613515218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,48,8,128,0,1,fp8,fp8,0,0.0553653339544932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,48,128,0,1,float16,fp8,0,0.037962667644023895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,48,128,0,1,fp8,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,2,128,0,1,float16,float16,0,0.03545066714286804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,2,128,0,1,float16,fp8,0,0.03748266647259394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,2,128,0,1,fp8,fp8,0,0.03463999927043915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,4,128,0,1,float16,float16,0,0.03613866617282232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,4,128,0,1,float16,fp8,0,0.03756800045569738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,4,128,0,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,8,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,8,128,0,1,float16,fp8,0,0.037674665451049805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,48,128,0,1,float16,float16,0,0.02773333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,48,128,0,1,float16,fp8,0,0.02847466617822647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,48,8,128,0,1,fp8,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,48,128,0,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,2,128,0,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,2,128,0,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,2,128,0,1,fp8,fp8,0,0.026154667139053345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,4,128,0,1,float16,float16,0,0.02737066646416982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,4,128,0,1,float16,fp8,0,0.02792000025510788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,4,128,0,1,fp8,fp8,0,0.025839999318122864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,8,128,0,1,float16,float16,0,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,8,128,0,1,float16,fp8,0,0.02757333219051361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,48,8,128,0,1,fp8,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,48,128,0,1,float16,float16,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,48,128,0,1,float16,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,48,128,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,2,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,2,128,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,2,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,4,128,0,1,float16,float16,0,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,4,128,0,1,float16,fp8,0,0.021183999876181286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,4,128,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,8,128,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,8,128,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,48,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,48,8,128,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,48,128,0,1,float16,fp8,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,2,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,48,128,0,1,fp8,fp8,0,0.017557332913080852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,2,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,2,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,4,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,4,128,0,1,float16,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,4,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,8,128,0,1,float16,float16,0,0.01882133384545644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,8,128,0,1,float16,fp8,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,48,8,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,2,128,0,1,float16,float16,0,0.44223467508951825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,2,128,0,1,float16,fp8,0,0.4439786672592163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,4,128,0,1,float16,float16,0,0.44466666380564374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,2,128,0,1,fp8,fp8,0,0.42208532492319745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,4,128,0,1,float16,fp8,0,0.44435731569925946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,4,128,0,1,fp8,fp8,0,0.4244906504948934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,8,128,0,1,float16,float16,0,0.45255998770395917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,8,128,0,1,float16,fp8,0,0.4531466563542684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,48,128,0,1,float16,float16,0,0.2635146578152974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,48,128,0,1,float16,fp8,0,0.2578879992167155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,48,8,128,0,1,fp8,fp8,0,0.4370453357696533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,48,128,0,1,fp8,fp8,0,0.26105600595474243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,2,128,0,1,float16,float16,0,0.23036799828211466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,2,128,0,1,float16,fp8,0,0.23065600792566934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,2,128,0,1,fp8,fp8,0,0.21930134296417236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,4,128,0,1,float16,float16,0,0.23106133937835693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,4,128,0,1,float16,fp8,0,0.23040000597635904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,4,128,0,1,fp8,fp8,0,0.21946666638056436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,8,128,0,1,float16,float16,0,0.235317329565684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,8,128,0,1,float16,fp8,0,0.23412267367045084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,48,8,128,0,1,fp8,fp8,0,0.22671467065811157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,48,128,0,1,float16,float16,0,0.14120533068974814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,48,128,0,1,float16,fp8,0,0.13758933544158936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,48,128,0,1,fp8,fp8,0,0.13942399621009827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,2,128,0,1,float16,float16,0,0.12359467148780823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,2,128,0,1,float16,fp8,0,0.12376532951990764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,2,128,0,1,fp8,fp8,0,0.11635200182596843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,4,128,0,1,float16,float16,0,0.12378666798273723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,4,128,0,1,float16,fp8,0,0.1241333285967509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,4,128,0,1,fp8,fp8,0,0.11719466249148051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,8,128,0,1,float16,float16,0,0.12591999769210815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,8,128,0,1,float16,fp8,0,0.12628266215324402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,48,8,128,0,1,fp8,fp8,0,0.12194666266441345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,48,128,0,1,float16,float16,0,0.07853333155314128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,48,128,0,1,float16,fp8,0,0.07658666869004567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,48,128,0,1,fp8,fp8,0,0.08097066481908162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,2,128,0,1,float16,float16,0,0.07062399884064992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,2,128,0,1,float16,fp8,0,0.06818666557470958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,2,128,0,1,fp8,fp8,0,0.06449066599210103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,4,128,0,1,float16,float16,0,0.07005333403746287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,4,128,0,1,float16,fp8,0,0.07056533296902974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,4,128,0,1,fp8,fp8,0,0.06404800216356914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,8,128,0,1,float16,float16,0,0.07055999835332234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,8,128,0,1,float16,fp8,0,0.070592001080513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,48,8,128,0,1,fp8,fp8,0,0.06524266799290974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,48,128,0,1,float16,float16,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,48,128,0,1,float16,fp8,0,0.04561600089073181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,48,128,0,1,fp8,fp8,0,0.04417600234349569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,2,128,0,1,float16,float16,0,0.042549331982930504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,2,128,0,1,float16,fp8,0,0.04385066529115041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,2,128,0,1,fp8,fp8,0,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,4,128,0,1,float16,float16,0,0.04338666796684265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,4,128,0,1,float16,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,4,128,0,1,fp8,fp8,0,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,8,128,0,1,float16,float16,0,0.043978666265805565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,8,128,0,1,float16,fp8,0,0.042965332667032875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,48,8,128,0,1,fp8,fp8,0,0.03986666599909464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,48,128,0,1,float16,float16,0,0.0313226655125618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,48,128,0,1,float16,fp8,0,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,48,128,0,1,fp8,fp8,0,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,2,128,0,1,float16,float16,0,0.028981332977612812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,2,128,0,1,float16,fp8,0,0.02863999952872594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,2,128,0,1,fp8,fp8,0,0.028031999866167705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,4,128,0,1,float16,float16,0,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,4,128,0,1,float16,fp8,0,0.029253333806991577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,4,128,0,1,fp8,fp8,0,0.028570666909217834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,8,128,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,8,128,0,1,float16,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,48,8,128,0,1,fp8,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,48,128,0,1,float16,float16,0,0.02203733225663503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,48,128,0,1,float16,fp8,0,0.023029332359631855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,48,128,0,1,fp8,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,2,128,0,1,float16,float16,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,2,128,0,1,float16,fp8,0,0.023552000522613525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,2,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,4,128,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,4,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,4,128,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,8,128,0,1,float16,float16,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,8,128,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,48,8,128,0,1,fp8,fp8,0,0.02197866638501485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,48,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,48,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,48,128,0,1,fp8,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,2,128,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,2,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,2,128,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,4,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,4,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,4,128,0,1,fp8,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,8,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,8,128,0,1,float16,fp8,0,0.017722666263580322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,48,8,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,48,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,48,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,2,128,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,48,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,2,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,2,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,4,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,4,128,0,1,float16,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,4,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,8,128,0,1,float16,float16,0,0.016623999923467636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,8,128,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,48,8,128,0,1,fp8,fp8,0,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,2,128,0,1,fp8,fp8,0,0.30898133913675946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,2,128,0,1,float16,float16,0,0.3226240078608195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,2,128,0,1,float16,fp8,0,0.32310400406519574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,4,128,0,1,float16,float16,0,0.3235893249511719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,4,128,0,1,fp8,fp8,0,0.30631999174753827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,8,128,0,1,float16,float16,0,0.3272106647491455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,4,128,0,1,float16,fp8,0,0.3221706748008728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,8,128,0,1,float16,fp8,0,0.3277066747347514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,48,128,0,1,float16,float16,0,0.18538665771484375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,48,8,128,0,1,fp8,fp8,0,0.31621867418289185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,48,128,0,1,fp8,fp8,0,0.18268799781799316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,48,128,0,1,float16,fp8,0,0.18307733535766602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,2,128,0,1,float16,float16,0,0.16954666376113892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,2,128,0,1,float16,fp8,0,0.1688106656074524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,2,128,0,1,fp8,fp8,0,0.15873600045839945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,4,128,0,1,float16,float16,0,0.17100799083709717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,4,128,0,1,float16,fp8,0,0.16871466239293417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,4,128,0,1,fp8,fp8,0,0.160863995552063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,8,128,0,1,float16,float16,0,0.1728586753209432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,8,128,0,1,float16,fp8,0,0.1701493263244629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,48,128,0,1,float16,float16,0,0.0993280013402303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,48,8,128,0,1,fp8,fp8,0,0.16663466890652975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,48,128,0,1,float16,fp8,0,0.09943999846776326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,48,128,0,1,fp8,fp8,0,0.10126933455467224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,2,128,0,1,float16,float16,0,0.09325866897900899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,2,128,0,1,float16,fp8,0,0.0923466682434082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,2,128,0,1,fp8,fp8,0,0.08455466230710347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,4,128,0,1,float16,float16,0,0.09216533104578654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,4,128,0,1,float16,fp8,0,0.09090133508046468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,4,128,0,1,fp8,fp8,0,0.08520000179608662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,8,128,0,1,float16,float16,0,0.09328533212343852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,8,128,0,1,float16,fp8,0,0.09153599540392558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,48,8,128,0,1,fp8,fp8,0,0.08503466844558716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,48,128,0,1,float16,fp8,0,0.05769066512584686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,48,128,0,1,float16,float16,0,0.05630933245023092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,48,128,0,1,fp8,fp8,0,0.054058666030565895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,2,128,0,1,float16,float16,0,0.05397333204746246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,2,128,0,1,float16,fp8,0,0.054229333996772766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,2,128,0,1,fp8,fp8,0,0.04994666576385498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,4,128,0,1,float16,float16,0,0.05375466744105021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,4,128,0,1,float16,fp8,0,0.05453333258628845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,4,128,0,1,fp8,fp8,0,0.05004266897837321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,8,128,0,1,float16,float16,0,0.053930665055910744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,8,128,0,1,float16,fp8,0,0.055311997731526695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,48,8,128,0,1,fp8,fp8,0,0.051818668842315674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,48,128,0,1,float16,float16,0,0.03573333223660787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,48,128,0,1,float16,fp8,0,0.03623466690381368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,48,128,0,1,fp8,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,2,128,0,1,float16,float16,0,0.03530666728814443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,2,128,0,1,float16,fp8,0,0.03578133384386698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,2,128,0,1,fp8,fp8,0,0.03363200028737386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,4,128,0,1,float16,float16,0,0.035749333600203194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,4,128,0,1,float16,fp8,0,0.03640000025431315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,4,128,0,1,fp8,fp8,0,0.03396799912055334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,8,128,0,1,float16,float16,0,0.03551999976237615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,8,128,0,1,float16,fp8,0,0.036229332288106285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,48,128,0,1,float16,float16,0,0.02390933285156886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,48,8,128,0,1,fp8,fp8,0,0.03417066733042399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,48,128,0,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,48,128,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,2,128,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,2,128,0,1,float16,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,2,128,0,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,4,128,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,4,128,0,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,4,128,0,1,fp8,fp8,0,0.023061332603295643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,8,128,0,1,float16,float16,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,8,128,0,1,float16,fp8,0,0.0236160010099411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,48,8,128,0,1,fp8,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,48,128,0,1,float16,float16,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,48,128,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,48,128,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,2,128,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,2,128,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,2,128,0,1,fp8,fp8,0,0.017984000345071156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,4,128,0,1,float16,float16,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,4,128,0,1,float16,fp8,0,0.020517333100239437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,4,128,0,1,fp8,fp8,0,0.01931200052301089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,8,128,0,1,float16,float16,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,8,128,0,1,float16,fp8,0,0.020479999482631683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,48,8,128,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,48,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,48,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,48,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,2,128,0,1,float16,float16,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,2,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,2,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,4,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,4,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,4,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,8,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,8,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,48,8,128,0,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,48,128,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,48,128,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,2,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,2,128,0,1,float16,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,48,128,0,1,fp8,fp8,0,0.01613866661985715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,2,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,4,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,4,128,0,1,float16,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,4,128,0,1,fp8,fp8,0,0.01613333324591319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,8,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,8,128,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,48,8,128,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,2,128,0,1,float16,float16,0,0.2632159988085429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,2,128,0,1,float16,fp8,0,0.26198933521906537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,2,128,0,1,fp8,fp8,0,0.2512693405151367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,4,128,0,1,float16,float16,0,0.2637973427772522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,4,128,0,1,float16,fp8,0,0.2629280090332031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,4,128,0,1,fp8,fp8,0,0.2530933419863383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,8,128,0,1,float16,float16,0,0.2662293314933777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,48,128,0,1,float16,float16,0,0.1471573313077291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,8,128,0,1,float16,fp8,0,0.2651306589444478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,48,8,128,0,1,fp8,fp8,0,0.25727999210357666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,48,128,0,1,float16,fp8,0,0.14643200238545737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,48,128,0,1,fp8,fp8,0,0.1476533313592275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,2,128,0,1,float16,float16,0,0.1399893363316854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,2,128,0,1,float16,fp8,0,0.13806933164596558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,2,128,0,1,fp8,fp8,0,0.1300373375415802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,4,128,0,1,float16,float16,0,0.14014933506647745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,4,128,0,1,float16,fp8,0,0.13818132877349854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,4,128,0,1,fp8,fp8,0,0.13023466865221658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,8,128,0,1,float16,float16,0,0.1401653289794922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,8,128,0,1,float16,fp8,0,0.14018133282661438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,48,8,128,0,1,fp8,fp8,0,0.13218667109807333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,48,128,0,1,float16,float16,0,0.08006933331489563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,48,128,0,1,float16,fp8,0,0.0795360008875529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,48,128,0,1,fp8,fp8,0,0.077824001510938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,2,128,0,1,float16,float16,0,0.07696000238259633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,2,128,0,1,float16,fp8,0,0.07677866518497467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,2,128,0,1,fp8,fp8,0,0.07332799832026164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,4,128,0,1,float16,float16,0,0.07686399916807811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,4,128,0,1,float16,fp8,0,0.07689066727956136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,4,128,0,1,fp8,fp8,0,0.07261866827805837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,8,128,0,1,float16,float16,0,0.07705600063006084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,8,128,0,1,float16,fp8,0,0.07654933134714763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,48,8,128,0,1,fp8,fp8,0,0.07258133093516032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,48,128,0,1,float16,float16,0,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,48,128,0,1,float16,fp8,0,0.04762666424115499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,48,128,0,1,fp8,fp8,0,0.04594666759173075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,2,128,0,1,float16,float16,0,0.04786666731039683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,2,128,0,1,float16,fp8,0,0.047728002071380615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,2,128,0,1,fp8,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,4,128,0,1,float16,float16,0,0.04762666424115499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,4,128,0,1,float16,fp8,0,0.04809066653251648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,4,128,0,1,fp8,fp8,0,0.045653333266576133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,8,128,0,1,float16,float16,0,0.04663466910521189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,8,128,0,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,48,8,128,0,1,fp8,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,48,128,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,48,128,0,1,float16,fp8,0,0.03126399964094162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,48,128,0,1,fp8,fp8,0,0.031328000128269196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,2,128,0,1,float16,float16,0,0.03166933357715607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,2,128,0,1,float16,fp8,0,0.029680001238981884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,2,128,0,1,fp8,fp8,0,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,4,128,0,1,float16,float16,0,0.031514666974544525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,4,128,0,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,4,128,0,1,fp8,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,8,128,0,1,float16,float16,0,0.0317546675602595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,8,128,0,1,float16,fp8,0,0.030069333811601002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,48,8,128,0,1,fp8,fp8,0,0.029637334247430164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,48,128,0,1,float16,float16,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,48,128,0,1,float16,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,48,128,0,1,fp8,fp8,0,0.022986667851607006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,2,128,0,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,2,128,0,1,float16,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,2,128,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,4,128,0,1,float16,float16,0,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,4,128,0,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,4,128,0,1,fp8,fp8,0,0.021040000021457672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,8,128,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,8,128,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,48,8,128,0,1,fp8,fp8,0,0.02117866774400075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,48,128,0,1,float16,float16,0,0.018005333840847015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,48,128,0,1,float16,fp8,0,0.01915733392039935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,48,128,0,1,fp8,fp8,0,0.018661333868900936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,2,128,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,2,128,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,4,128,0,1,float16,float16,0,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,2,128,0,1,float16,float16,0,0.020080000162124634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,4,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,4,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,8,128,0,1,float16,float16,0,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,8,128,0,1,float16,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,48,8,128,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,48,128,0,1,float16,float16,0,0.015967999895413715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,48,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,48,128,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,2,128,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,2,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,2,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,4,128,0,1,float16,float16,0,0.01647466669480006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,4,128,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,4,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,8,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,8,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,48,8,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,48,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,48,128,0,1,float16,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,48,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,2,128,0,1,float16,float16,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,2,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,2,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,4,128,0,1,float16,float16,0,0.016613333175579708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,4,128,0,1,float16,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,4,128,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,8,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,8,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,48,8,128,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,2,128,0,1,float16,fp8,0,0.22060267130533853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,2,128,0,1,float16,float16,0,0.22233599424362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,2,128,0,1,fp8,fp8,0,0.20143999656041464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,4,128,0,1,float16,float16,0,0.22103466590245566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,4,128,0,1,fp8,fp8,0,0.20232532421747842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,4,128,0,1,float16,fp8,0,0.22191466887791952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,8,128,0,1,float16,float16,0,0.22225600481033325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,8,128,0,1,float16,fp8,0,0.22223466634750366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,48,8,128,0,1,fp8,fp8,0,0.20189867417017618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,0,0.11780266960461934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,0,0.11767466862996419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,48,128,0,1,fp8,fp8,0,0.1074666678905487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,2,128,0,1,float16,float16,0,0.11759466926256816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,2,128,0,1,float16,fp8,0,0.11749333143234253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,2,128,0,1,fp8,fp8,0,0.10756267110506694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,4,128,0,1,float16,float16,0,0.1179039975007375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,4,128,0,1,float16,fp8,0,0.11743467052777608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,4,128,0,1,fp8,fp8,0,0.10739200313886006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,8,128,0,1,float16,float16,0,0.11768533786137898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,8,128,0,1,float16,fp8,0,0.11773866415023804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,48,8,128,0,1,fp8,fp8,0,0.10754666725794475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,0,0.06635199983914693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,0,0.0664160003264745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,48,128,0,1,fp8,fp8,0,0.06238399942715963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,2,128,0,1,float16,float16,0,0.06771733363469441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,2,128,0,1,float16,fp8,0,0.06643733382225037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,2,128,0,1,fp8,fp8,0,0.06217599908510844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,4,128,0,1,float16,float16,0,0.06695466736952464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,4,128,0,1,float16,fp8,0,0.06670400003592174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,4,128,0,1,fp8,fp8,0,0.06021333237489065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,8,128,0,1,float16,float16,0,0.0666293352842331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,8,128,0,1,float16,fp8,0,0.06640000144640605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,48,8,128,0,1,fp8,fp8,0,0.061050668358802795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,0,0.040207999447981514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,0,0.042912001411120095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,48,128,0,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,2,128,0,1,float16,float16,0,0.041989331444104515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,2,128,0,1,float16,fp8,0,0.0415786678592364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,2,128,0,1,fp8,fp8,0,0.037589333951473236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,4,128,0,1,fp8,fp8,0,0.03763733307520548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,4,128,0,1,float16,float16,0,0.04181866844495138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,4,128,0,1,float16,fp8,0,0.042853335539499916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,8,128,0,1,float16,float16,0,0.0395413339138031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,8,128,0,1,fp8,fp8,0,0.03984000037113825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,48,8,128,0,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,0,0.028031999866167705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,48,128,0,1,fp8,fp8,0,0.026901334524154663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,2,128,0,1,float16,float16,0,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,2,128,0,1,float16,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,2,128,0,1,fp8,fp8,0,0.026880001028378803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,4,128,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,4,128,0,1,float16,fp8,0,0.029178666571776073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,4,128,0,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,8,128,0,1,float16,float16,0,0.028592000404993694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,8,128,0,1,float16,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,48,8,128,0,1,fp8,fp8,0,0.02735999971628189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,0,0.02269333352645238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,48,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,2,128,0,1,float16,float16,0,0.021013334393501282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,2,128,0,1,float16,fp8,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,2,128,0,1,fp8,fp8,0,0.021669333179791767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,4,128,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,4,128,0,1,float16,fp8,0,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,4,128,0,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,8,128,0,1,float16,float16,0,0.02204799900452296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,8,128,0,1,float16,fp8,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,48,8,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,0,0.018394666413466137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,0,0.01871466636657715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,48,128,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,2,128,0,1,float16,float16,0,0.018522666146357853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,2,128,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,2,128,0,1,fp8,fp8,0,0.018197332819302876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,4,128,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,4,128,0,1,float16,float16,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,4,128,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,8,128,0,1,float16,float16,0,0.018858666221300762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,8,128,0,1,float16,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,48,8,128,0,1,fp8,fp8,0,0.017952000101407368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,48,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,2,128,0,1,float16,float16,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,2,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,2,128,0,1,fp8,fp8,0,0.015706667055686314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,4,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,4,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,4,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,8,128,0,1,float16,fp8,0,0.01672533278663953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,8,128,0,1,float16,float16,0,0.01568000018596649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,48,8,128,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,2,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,2,128,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,48,128,0,1,fp8,fp8,0,0.015840000162522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,2,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,4,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,4,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,4,128,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,8,128,0,1,float16,float16,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,8,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,48,8,128,0,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,2,128,0,1,fp8,fp8,0,11.044677734375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,fp8,0,14.243061065673828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,2,128,0,1,float16,float16,0,14.725903828938803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,float16,0,14.484235127766928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,4,128,0,1,float16,fp8,0,15.179407755533854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,4,128,0,1,fp8,fp8,0,11.324586232503256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,float16,0,15.337103525797525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,float16,0,7.7346242268880205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,8,128,0,1,float16,fp8,0,15.446351369222006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,40,128,0,1,float16,fp8,0,7.843861262003581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,40,8,128,0,1,fp8,fp8,0,11.202719370524088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,40,128,0,1,fp8,fp8,0,5.863920211791992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,2,128,0,1,fp8,fp8,0,5.619781494140625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,float16,0,7.439413070678711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,2,128,0,1,float16,fp8,0,7.38432502746582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,float16,0,7.35813840230306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,4,128,0,1,fp8,fp8,0,5.647082646687825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,4,128,0,1,float16,fp8,0,7.397274653116862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,float16,0,7.586437225341797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,float16,0,3.893690745035807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,8,128,0,1,float16,fp8,0,7.327829360961914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,40,8,128,0,1,fp8,fp8,0,5.634426752726237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,40,128,0,1,float16,fp8,0,3.896320025126139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,40,128,0,1,fp8,fp8,0,3.0620800654093423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,float16,0,3.6998828252156577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,2,128,0,1,fp8,fp8,0,2.944016138712565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,2,128,0,1,float16,fp8,0,3.6173121134440103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,float16,0,3.5606559117635093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,4,128,0,1,fp8,fp8,0,2.9431947072347007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,4,128,0,1,float16,fp8,0,3.781696001688639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,float16,0,3.7279841105143228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,float16,0,1.9944586753845215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,8,128,0,1,float16,fp8,0,3.7421706517537436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,40,8,128,0,1,fp8,fp8,0,2.949765205383301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,40,128,0,1,float16,fp8,0,2.03220272064209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,40,128,0,1,fp8,fp8,0,1.66921599706014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,float16,0,1.945583979288737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,2,128,0,1,fp8,fp8,0,1.6270400683085124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,2,128,0,1,float16,fp8,0,1.9174933433532715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,float16,0,1.9001439412434895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,4,128,0,1,fp8,fp8,0,1.6160267194112141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,4,128,0,1,float16,fp8,0,1.9366827011108398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,float16,0,1.9394292831420898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,8,128,0,1,float16,fp8,0,1.9140480359395344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,40,8,128,0,1,fp8,fp8,0,1.6286880175272624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,2,128,0,1,fp8,fp8,0,6.593749364217122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,float16,0,8.36962636311849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,2,128,0,1,float16,fp8,0,8.442096074422201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,float16,0,8.512357076009115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,4,128,0,1,fp8,fp8,0,6.650266647338867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,4,128,0,1,float16,fp8,0,8.487034479777018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,float16,0,8.875914891560873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,8,128,0,1,float16,fp8,0,8.56056022644043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,float16,0,4.714693387349446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,40,128,0,1,float16,fp8,0,4.688783963521321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,40,8,128,0,1,fp8,fp8,0,6.648591995239258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,40,128,0,1,fp8,fp8,0,3.641082763671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,float16,0,4.179210662841797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,2,128,0,1,fp8,fp8,0,3.3865652084350586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,2,128,0,1,float16,fp8,0,4.386506716410319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,float16,0,4.15939203898112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,4,128,0,1,fp8,fp8,0,3.3952693939208984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,4,128,0,1,float16,fp8,0,4.404719988505046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,float16,0,4.36847464243571
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,8,128,0,1,float16,fp8,0,4.379359881083171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,float16,0,2.350282669067383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,40,8,128,0,1,fp8,fp8,0,3.4230292638142905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,40,128,0,1,float16,fp8,0,2.361957391103109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,40,128,0,1,fp8,fp8,0,1.9047519365946453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,float16,0,2.1720959345499673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,2,128,0,1,float16,fp8,0,2.1544319788614907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,2,128,0,1,fp8,fp8,0,1.8041332562764485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,float16,0,2.1771626472473145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,4,128,0,1,fp8,fp8,0,1.8081067403157551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,4,128,0,1,float16,fp8,0,2.198906739552816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,float16,0,2.2034826278686523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,8,128,0,1,float16,fp8,0,2.215285301208496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,float16,0,1.2434826691945393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,40,8,128,0,1,fp8,fp8,0,1.820298671722412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,40,128,0,1,float16,fp8,0,1.2719679673512776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,40,128,0,1,fp8,fp8,0,1.109440008799235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,float16,0,1.1998133659362793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,2,128,0,1,fp8,fp8,0,1.0385599931081135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,2,128,0,1,float16,fp8,0,1.1909333070119221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,float16,0,1.1898826758066814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,4,128,0,1,float16,fp8,0,1.184127966562907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,4,128,0,1,fp8,fp8,0,1.0169546604156494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,float16,0,1.1944479942321777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,8,128,0,1,float16,fp8,0,1.205077330271403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,40,8,128,0,1,fp8,fp8,0,1.0463893413543701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,2,128,0,1,fp8,fp8,0,4.8112532297770185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,float16,0,6.11251703898112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,2,128,0,1,float16,fp8,0,5.998213450113933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,float16,0,5.970074971516927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,4,128,0,1,fp8,fp8,0,4.823429425557454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,4,128,0,1,float16,fp8,0,6.174064000447591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,float16,0,6.035167694091797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,float16,0,3.2435038884480796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,40,128,0,1,float16,fp8,0,3.296693483988444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,8,128,0,1,fp8,fp8,0,4.881264050801595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,40,8,128,0,1,float16,fp8,0,6.06935437520345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,40,128,0,1,fp8,fp8,0,2.6481653849283853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,float16,0,3.0294081370035806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,2,128,0,1,float16,fp8,0,3.038544019063314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,2,128,0,1,fp8,fp8,0,2.549503962198893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,float16,0,3.0723466873168945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,4,128,0,1,fp8,fp8,0,2.5096747080485025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,4,128,0,1,float16,fp8,0,3.0534400939941406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,float16,0,3.070890744527181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,float16,0,1.7245972951253254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,40,128,0,1,float16,fp8,0,1.7757813135782878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,8,128,0,1,fp8,fp8,0,2.5271093050638833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,40,8,128,0,1,float16,fp8,0,3.2067947387695312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,40,128,0,1,fp8,fp8,0,1.4147040049235027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,float16,0,1.6093173027038574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,2,128,0,1,float16,fp8,0,1.625088055928548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,2,128,0,1,fp8,fp8,0,1.3339625994364421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,float16,0,1.5968960126241047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,4,128,0,1,float16,fp8,0,1.617322603861491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,4,128,0,1,fp8,fp8,0,1.4371147155761719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,float16,0,1.5884639422098796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,float16,0,0.9457013607025146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,8,128,0,1,fp8,fp8,0,1.3523252805074055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,40,128,0,1,float16,fp8,0,0.9552640120188395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,40,8,128,0,1,float16,fp8,0,1.6181972821553547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,40,128,0,1,fp8,fp8,0,0.8049866358439127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,2,128,0,1,fp8,fp8,0,0.7688639958699545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,fp8,0,0.8831893603006998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,2,128,0,1,float16,float16,0,0.8880053361256918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,float16,0,0.892784039179484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,4,128,0,1,fp8,fp8,0,0.7670293649037679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,4,128,0,1,float16,fp8,0,0.8894186814626058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,float16,0,0.8977546691894531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,8,128,0,1,float16,fp8,0,0.9019680023193359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,40,8,128,0,1,fp8,fp8,0,0.7681706746419271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,2,128,0,1,fp8,fp8,0,6.539765040079753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,float16,0,8.127626419067383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,2,128,0,1,float16,fp8,0,8.177077611287435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,float16,0,8.031808217366537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,4,128,0,1,fp8,fp8,0,6.539498647054036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,4,128,0,1,float16,fp8,0,8.236096064249674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,float16,0,8.212581634521484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,8,128,0,1,float16,fp8,0,8.407733281453451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,float16,0,4.479717254638672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,40,128,0,1,float16,fp8,0,4.46729056040446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,40,128,0,1,fp8,fp8,0,3.569141387939453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,40,8,128,0,1,fp8,fp8,0,6.64255968729655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,float16,0,4.0981705983479815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,2,128,0,1,fp8,fp8,0,3.297130584716797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,2,128,0,1,float16,fp8,0,4.111029307047526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,float16,0,4.081674575805664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,4,128,0,1,fp8,fp8,0,3.310175895690918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,4,128,0,1,float16,fp8,0,4.331839879353841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,float16,0,4.170165379842122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,8,128,0,1,float16,fp8,0,4.200869242350261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,float16,0,2.3147786458333335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,40,8,128,0,1,fp8,fp8,0,3.3372532526652017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,40,128,0,1,float16,fp8,0,2.3018506368001304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,40,128,0,1,fp8,fp8,0,1.8601172765096028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,float16,0,2.1171414057413735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,2,128,0,1,float16,fp8,0,2.058133284250895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,2,128,0,1,fp8,fp8,0,1.7668959299723308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,float16,0,2.0837225914001465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,4,128,0,1,float16,fp8,0,2.1086986859639487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,4,128,0,1,fp8,fp8,0,1.8498026529947917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,float16,0,2.092458724975586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,8,128,0,1,float16,fp8,0,2.127589384714762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,float16,0,1.1938986778259277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,40,8,128,0,1,fp8,fp8,0,1.746341387430827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,40,128,0,1,float16,fp8,0,1.23799467086792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,40,128,0,1,fp8,fp8,0,1.002784013748169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,float16,0,1.1066986719767253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,2,128,0,1,fp8,fp8,0,0.9390559991200765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,2,128,0,1,float16,fp8,0,1.1116267045338948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,float16,0,1.1052160263061523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,4,128,0,1,float16,fp8,0,1.1111306349436443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,4,128,0,1,fp8,fp8,0,0.9431467056274414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,float16,0,1.1162292957305908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,8,128,0,1,float16,fp8,0,1.1196906566619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,40,8,128,0,1,fp8,fp8,0,0.9862186908721924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,float16,0,0.6774986584981283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,40,128,0,1,float16,fp8,0,0.6858346462249756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,40,128,0,1,fp8,fp8,0,0.5874720017115275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,float16,0,0.6184800068537394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,2,128,0,1,float16,fp8,0,0.6268266836802164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,2,128,0,1,fp8,fp8,0,0.5490026473999023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,float16,0,0.630730668703715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,4,128,0,1,float16,fp8,0,0.6289706627527872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,4,128,0,1,fp8,fp8,0,0.5511733293533325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,float16,0,0.6378986835479736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,8,128,0,1,float16,fp8,0,0.6417920192082723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,40,8,128,0,1,fp8,fp8,0,0.5526880025863647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,2,128,0,1,fp8,fp8,0,4.076058705647786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,float16,0,4.89575990041097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,2,128,0,1,float16,fp8,0,4.962202707926433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,4,128,0,1,fp8,fp8,0,4.081658681233724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,float16,0,4.991893450419108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,4,128,0,1,float16,fp8,0,4.9822133382161455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,float16,0,2.741648038228353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,float16,0,4.98472531636556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,8,128,0,1,fp8,fp8,0,4.155407905578613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,40,8,128,0,1,float16,fp8,0,5.016613324483235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,40,128,0,1,fp8,fp8,0,2.2913173039754233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,40,128,0,1,float16,fp8,0,2.795626640319824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,float16,0,2.499413331349691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,2,128,0,1,float16,fp8,0,2.5010666847229004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,2,128,0,1,fp8,fp8,0,2.074293295542399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,float16,0,2.493039925893148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,4,128,0,1,float16,fp8,0,2.527930736541748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,4,128,0,1,fp8,fp8,0,2.0956479708353677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,float16,0,2.4931999842325845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,float16,0,1.4344746271769206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,8,128,0,1,float16,fp8,0,2.565626621246338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,40,8,128,0,1,fp8,fp8,0,2.106719970703125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,40,128,0,1,float16,fp8,0,1.4777119954427083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,40,128,0,1,fp8,fp8,0,1.2040533224741619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,float16,0,1.309930642445882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,2,128,0,1,float16,fp8,0,1.2906400362650554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,2,128,0,1,fp8,fp8,0,1.092192014058431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,float16,0,1.2944586277008057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,4,128,0,1,fp8,fp8,0,1.103333314259847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,4,128,0,1,float16,fp8,0,1.3110346794128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,float16,0,1.299455960591634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,float16,0,0.7817653020222982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,8,128,0,1,float16,fp8,0,1.3220053513844807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,40,8,128,0,1,fp8,fp8,0,1.1126666863759358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,40,128,0,1,float16,fp8,0,0.7806293169657389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,40,128,0,1,fp8,fp8,0,0.661845326423645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,float16,0,0.713263988494873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,2,128,0,1,float16,fp8,0,0.7067946592966715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,2,128,0,1,fp8,fp8,0,0.6076373259226481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,float16,0,0.707472006479899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,4,128,0,1,float16,fp8,0,0.7112053235371908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,4,128,0,1,fp8,fp8,0,0.6088106632232666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,float16,0,0.7093599637349447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,float16,0,0.44944532712300617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,8,128,0,1,float16,fp8,0,0.7188693682352701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,40,8,128,0,1,fp8,fp8,0,0.6175839900970459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,40,128,0,1,float16,fp8,0,0.4549599885940552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,40,128,0,1,fp8,fp8,0,0.38998401165008545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,float16,0,0.4089173475901286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,2,128,0,1,float16,fp8,0,0.40622933705647785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,2,128,0,1,fp8,fp8,0,0.3651786645253499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,float16,0,0.411845326423645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,4,128,0,1,fp8,fp8,0,0.3673866589864095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,4,128,0,1,float16,fp8,0,0.411626656850179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,float16,0,0.4148533344268799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,8,128,0,1,float16,fp8,0,0.4200693368911743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,40,8,128,0,1,fp8,fp8,0,0.36982933680216473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,2,128,0,1,fp8,fp8,0,4.30189863840739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,float16,0,5.099050521850586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,2,128,0,1,float16,fp8,0,5.168160120646159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,float16,0,5.1582291920979815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,4,128,0,1,fp8,fp8,0,4.309189478556315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,4,128,0,1,float16,fp8,0,5.213386535644531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,float16,0,5.194757461547852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,8,128,0,1,float16,fp8,0,5.304261207580566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,float16,0,2.912752151489258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,40,128,0,1,fp8,fp8,0,2.4497706095377603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,40,128,0,1,float16,fp8,0,2.9197492599487305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,40,8,128,0,1,fp8,fp8,0,4.366170565287272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,float16,0,2.539386590321859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,2,128,0,1,fp8,fp8,0,2.145616054534912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,2,128,0,1,float16,fp8,0,2.5403839747111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,float16,0,2.554661273956299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,4,128,0,1,fp8,fp8,0,2.1588692665100098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,4,128,0,1,float16,fp8,0,2.565413316090902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,float16,0,2.600053310394287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,8,128,0,1,float16,fp8,0,2.608853340148926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,float16,0,1.4836799303690593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,40,8,128,0,1,fp8,fp8,0,2.1913493474324546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,40,128,0,1,float16,fp8,0,1.5198240280151367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,40,128,0,1,fp8,fp8,0,1.2974186738332112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,float16,0,1.2990933259328206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,2,128,0,1,fp8,fp8,0,1.1233546733856201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,2,128,0,1,float16,fp8,0,1.295408010482788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,float16,0,1.3130133152008057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,4,128,0,1,float16,fp8,0,1.3156800270080566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,4,128,0,1,fp8,fp8,0,1.124176025390625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,float16,0,1.3268906275431316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,8,128,0,1,float16,fp8,0,1.3415093421936035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,float16,0,0.7875359853108724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,40,8,128,0,1,fp8,fp8,0,1.1383946736653645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,40,128,0,1,float16,fp8,0,0.793077309926351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,40,128,0,1,fp8,fp8,0,0.6751680374145508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,float16,0,0.6949280103047689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,2,128,0,1,float16,fp8,0,0.6944426695505778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,2,128,0,1,fp8,fp8,0,0.6001760164896647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,float16,0,0.6982080141703287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,4,128,0,1,fp8,fp8,0,0.6037973165512085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,4,128,0,1,float16,fp8,0,0.7024160226186117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,float16,0,0.7046240170796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,8,128,0,1,float16,fp8,0,0.7145386536916097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,float16,0,0.43862934907277423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,40,8,128,0,1,fp8,fp8,0,0.6135093371073405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,40,128,0,1,fp8,fp8,0,0.38211198647816974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,40,128,0,1,float16,fp8,0,0.4457919994990031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,float16,0,0.38636799653371173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,2,128,0,1,float16,fp8,0,0.3873973290125529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,2,128,0,1,fp8,fp8,0,0.3439893325169881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,float16,0,0.391429344813029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,4,128,0,1,fp8,fp8,0,0.34540800253550213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,4,128,0,1,float16,fp8,0,0.3936213254928589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,float16,0,0.39749332269032794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,float16,0,0.26317866643269855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,8,128,0,1,fp8,fp8,0,0.3500053485234578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,40,8,128,0,1,float16,fp8,0,0.4005066553751628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,40,128,0,1,float16,fp8,0,0.26715733607610065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,40,128,0,1,fp8,fp8,0,0.2323946754137675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,float16,0,0.2300106684366862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,2,128,0,1,fp8,fp8,0,0.20983999967575073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,2,128,0,1,float16,fp8,0,0.2318133314450582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,float16,0,0.23079466819763184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,4,128,0,1,float16,fp8,0,0.23577600717544556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,4,128,0,1,fp8,fp8,0,0.21267199516296387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,float16,0,0.23449599742889404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,8,128,0,1,fp8,fp8,0,0.21594667434692383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,40,8,128,0,1,float16,fp8,0,0.2365866700808207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,2,128,0,1,fp8,fp8,0,2.7860425313313804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,float16,0,3.2744693756103516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,2,128,0,1,float16,fp8,0,3.254938761393229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,4,128,0,1,fp8,fp8,0,2.801168123881022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,float16,0,3.2470881144205728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,4,128,0,1,float16,fp8,0,3.285194714864095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,float16,0,3.3142080307006836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,float16,0,1.8838133811950684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,40,128,0,1,float16,fp8,0,1.903861363728841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,8,128,0,1,fp8,fp8,0,2.8439305623372397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,40,8,128,0,1,float16,fp8,0,3.3477067947387695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,40,128,0,1,fp8,fp8,0,1.6150026321411133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,float16,0,1.6396959622701008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,2,128,0,1,float16,fp8,0,1.6318507194519043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,2,128,0,1,fp8,fp8,0,1.4081066449483235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,float16,0,1.6568800608317058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,4,128,0,1,float16,fp8,0,1.6628586451212566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,4,128,0,1,fp8,fp8,0,1.416986624399821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,float16,0,1.681440035502116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,float16,0,0.9703893661499023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,8,128,0,1,float16,fp8,0,1.695269266764323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,40,128,0,1,float16,fp8,0,0.9918399651845297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,40,8,128,0,1,fp8,fp8,0,1.4394933382670085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,40,128,0,1,fp8,fp8,0,0.8416000207265218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,float16,0,0.8560693264007568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,2,128,0,1,fp8,fp8,0,0.7390399773915609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,2,128,0,1,float16,fp8,0,0.8592267036437988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,float16,0,0.8581439654032389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,4,128,0,1,fp8,fp8,0,0.7454293568929037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,4,128,0,1,float16,fp8,0,0.8701226711273193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,float16,0,0.8760906855265299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,float16,0,0.5249760150909424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,40,128,0,1,float16,fp8,0,0.5339626471201578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,8,128,0,1,fp8,fp8,0,0.7547039985656738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,40,8,128,0,1,float16,fp8,0,0.8768373330434164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,40,128,0,1,fp8,fp8,0,0.4557120005289714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,float16,0,0.46092267831166583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,2,128,0,1,float16,fp8,0,0.4619786739349365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,2,128,0,1,fp8,fp8,0,0.40350933869679767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,float16,0,0.46695999304453534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,4,128,0,1,fp8,fp8,0,0.4076586564381917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,4,128,0,1,float16,fp8,0,0.4665333429972331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,float16,0,0.4717973470687866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,8,128,0,1,float16,fp8,0,0.47624532381693524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,float16,0,0.29969600836435956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,40,128,0,1,float16,fp8,0,0.30316799879074097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,40,8,128,0,1,fp8,fp8,0,0.4123893181482951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,float16,0,0.2600799997647603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,40,128,0,1,fp8,fp8,0,0.2633066574732463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,2,128,0,1,float16,fp8,0,0.2579306761423747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,2,128,0,1,fp8,fp8,0,0.23667732874552408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,4,128,0,1,fp8,fp8,0,0.23842666546503702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,float16,0,0.26123199860254925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,4,128,0,1,float16,fp8,0,0.26366933186848956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,float16,0,0.26944533983866376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,float16,0,0.18731733163197836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,40,128,0,1,float16,fp8,0,0.1868000030517578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,8,128,0,1,fp8,fp8,0,0.24054400126139322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,40,8,128,0,1,float16,fp8,0,0.268885334332784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,40,128,0,1,fp8,fp8,0,0.16474666198094687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,2,128,0,1,fp8,fp8,0,0.14646400014559427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,float16,0,0.16037333011627197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,2,128,0,1,float16,fp8,0,0.16248533129692078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,float16,0,0.1628373364607493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,4,128,0,1,float16,fp8,0,0.16221333543459573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,4,128,0,1,fp8,fp8,0,0.14828800161679587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,float16,0,0.16288533806800842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,8,128,0,1,float16,fp8,0,0.16431466738382974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,40,8,128,0,1,fp8,fp8,0,0.15001066525777182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,2,128,0,1,fp8,fp8,0,3.15286922454834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,float16,0,3.682623863220215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,2,128,0,1,float16,fp8,0,3.6726506551106772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,float16,0,3.6597652435302734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,4,128,0,1,fp8,fp8,0,3.1612265904744468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,4,128,0,1,float16,fp8,0,3.702554702758789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,float16,0,3.736933390299479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,8,128,0,1,float16,fp8,0,3.7462987899780273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,float16,0,2.102117379506429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,40,128,0,1,float16,fp8,0,2.1224053700764975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,40,8,128,0,1,fp8,fp8,0,3.2151947021484375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,40,128,0,1,fp8,fp8,0,1.8274134000142415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,float16,0,1.8041973114013672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,2,128,0,1,float16,fp8,0,1.8194986979166667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,2,128,0,1,fp8,fp8,0,1.5687626202901204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,float16,0,1.817578633626302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,4,128,0,1,float16,fp8,0,1.8369119962056477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,4,128,0,1,fp8,fp8,0,1.580026626586914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,float16,0,1.8602399826049805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,8,128,0,1,fp8,fp8,0,1.6105866432189941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,40,8,128,0,1,float16,fp8,0,1.8786400159200032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,float16,0,1.0752800305684407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,40,128,0,1,float16,fp8,0,1.0894827047983806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,40,128,0,1,fp8,fp8,0,0.9367466767628988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,float16,0,0.9293279647827148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,2,128,0,1,float16,fp8,0,0.933141311009725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,2,128,0,1,fp8,fp8,0,0.8063573042551676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,float16,0,0.937824010848999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,4,128,0,1,float16,fp8,0,0.9437173207600912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,4,128,0,1,fp8,fp8,0,0.8133440017700195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,float16,0,0.9571946461995443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,float16,0,0.5645333528518677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,8,128,0,1,float16,fp8,0,0.965445359547933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,40,8,128,0,1,fp8,fp8,0,0.8283572991689047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,40,128,0,1,float16,fp8,0,0.5750666856765747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,40,128,0,1,fp8,fp8,0,0.4986613194147746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,float16,0,0.4927146832148234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,2,128,0,1,float16,fp8,0,0.49167998631795246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,2,128,0,1,fp8,fp8,0,0.4290826718012492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,float16,0,0.4952319860458374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,4,128,0,1,float16,fp8,0,0.499839981396993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,4,128,0,1,fp8,fp8,0,0.4352693160374959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,float16,0,0.5048160155614217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,8,128,0,1,float16,fp8,0,0.5076800187428793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,float16,0,0.31147199869155884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,40,8,128,0,1,fp8,fp8,0,0.44193601608276367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,40,128,0,1,float16,fp8,0,0.31696534156799316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,40,128,0,1,fp8,fp8,0,0.27436800797780353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,float16,0,0.2700586716334025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,2,128,0,1,float16,fp8,0,0.2691626747449239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,2,128,0,1,fp8,fp8,0,0.24253867069880167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,float16,0,0.2712480028470357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,4,128,0,1,float16,fp8,0,0.2736000021298726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,4,128,0,1,fp8,fp8,0,0.24261866013209024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,float16,0,0.2778880000114441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,8,128,0,1,float16,fp8,0,0.28060799837112427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,float16,0,0.1832159956296285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,40,8,128,0,1,fp8,fp8,0,0.24707732597986856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,40,128,0,1,float16,fp8,0,0.18691200017929077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,40,128,0,1,fp8,fp8,0,0.1634773313999176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,float16,0,0.15345600247383118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,2,128,0,1,float16,fp8,0,0.15381866693496704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,2,128,0,1,fp8,fp8,0,0.1413279970486959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,fp8,0,0.1565546691417694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,4,128,0,1,float16,float16,0,0.15460800131162009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,4,128,0,1,fp8,fp8,0,0.14355199535687765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,float16,0,0.15787733594576517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,8,128,0,1,float16,fp8,0,0.1602826714515686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,40,8,128,0,1,fp8,fp8,0,0.14729600151379904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,float16,0,0.11156800389289856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,40,128,0,1,float16,fp8,0,0.11178666353225708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,40,128,0,1,fp8,fp8,0,0.10619200269381206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,float16,0,0.10280533631642659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,2,128,0,1,float16,fp8,0,0.10398933291435242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,2,128,0,1,fp8,fp8,0,0.09506133198738098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,fp8,0,0.10314666231473286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,4,128,0,1,float16,float16,0,0.10459733009338379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,4,128,0,1,fp8,fp8,0,0.09538132945696513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,float16,0,0.10335999727249146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,8,128,0,1,float16,fp8,0,0.10381866494814555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,40,8,128,0,1,fp8,fp8,0,0.0963200032711029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,2,128,0,1,fp8,fp8,0,2.1397013664245605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,float16,0,2.4757919311523438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,2,128,0,1,float16,fp8,0,2.475829283396403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,float16,0,2.5009973843892417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,4,128,0,1,fp8,fp8,0,2.1582560539245605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,4,128,0,1,float16,fp8,0,2.515658696492513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,float16,0,2.5531999270121255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,8,128,0,1,float16,fp8,0,2.5745066006978354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,float16,0,1.4443786938985188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,40,128,0,1,float16,fp8,0,1.462469259897868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,40,128,0,1,fp8,fp8,0,1.2657492955525715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,40,8,128,0,1,fp8,fp8,0,2.203824043273926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,float16,0,1.2413973013559978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,2,128,0,1,fp8,fp8,0,1.0757919947306316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,2,128,0,1,float16,fp8,0,1.2416000366210938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,float16,0,1.2544373671213787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,4,128,0,1,fp8,fp8,0,1.0901866753896077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,4,128,0,1,float16,fp8,0,1.2648159662882488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,float16,0,1.2839893500010173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,8,128,0,1,float16,fp8,0,1.2951040267944336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,float16,0,0.7436906496683756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,40,128,0,1,float16,fp8,0,0.755120038986206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,40,8,128,0,1,fp8,fp8,0,1.1120426654815674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,40,128,0,1,fp8,fp8,0,0.6551520029703776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,float16,0,0.6415146589279175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,2,128,0,1,fp8,fp8,0,0.5599146684010824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,2,128,0,1,float16,fp8,0,0.642901341120402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,float16,0,0.6525439818700155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,4,128,0,1,fp8,fp8,0,0.5679359833399454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,4,128,0,1,float16,fp8,0,0.6537813345591227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,float16,0,0.6644906600316366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,8,128,0,1,fp8,fp8,0,0.5764533281326294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,40,8,128,0,1,float16,fp8,0,0.6697440147399902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,float16,0,0.3949973185857137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,40,128,0,1,fp8,fp8,0,0.3511679967244466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,40,128,0,1,float16,fp8,0,0.40427732467651367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,float16,0,0.3428639968236287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,2,128,0,1,fp8,fp8,0,0.3020799954732259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,2,128,0,1,float16,fp8,0,0.3421386480331421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,float16,0,0.3468480110168457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,4,128,0,1,fp8,fp8,0,0.304367999235789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,4,128,0,1,float16,fp8,0,0.34931198755900067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,float16,0,0.3551199833552043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,8,128,0,1,fp8,fp8,0,0.3104533354441325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,40,8,128,0,1,float16,fp8,0,0.357477347056071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,float16,0,0.22061866521835327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,40,128,0,1,float16,fp8,0,0.22535999615987143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,40,128,0,1,fp8,fp8,0,0.1974666714668274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,float16,0,0.18811200062433878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,2,128,0,1,fp8,fp8,0,0.1718133290608724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,2,128,0,1,float16,fp8,0,0.18847467501958212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,float16,0,0.1913813352584839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,4,128,0,1,float16,fp8,0,0.1926506757736206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,4,128,0,1,fp8,fp8,0,0.17333867152531943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,float16,0,0.196560005346934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,8,128,0,1,fp8,fp8,0,0.17717333634694418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,40,8,128,0,1,float16,fp8,0,0.19628800948460898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,float16,0,0.13168533643086752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,40,128,0,1,float16,fp8,0,0.1330773333708445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,40,128,0,1,fp8,fp8,0,0.11900800466537476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,float16,0,0.11074666182200114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,2,128,0,1,float16,fp8,0,0.11136533816655476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,2,128,0,1,fp8,fp8,0,0.10047466556231181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,float16,0,0.11068800091743469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,4,128,0,1,float16,fp8,0,0.11202667156855266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,4,128,0,1,fp8,fp8,0,0.1011199951171875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,float16,0,0.11317333579063416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,8,128,0,1,float16,fp8,0,0.11335466305414836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,40,8,128,0,1,fp8,fp8,0,0.10313066840171814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,float16,0,0.0827466646830241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,40,128,0,1,float16,fp8,0,0.08348799745241801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,40,128,0,1,fp8,fp8,0,0.07830933233102162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,float16,0,0.07853866616884868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,2,128,0,1,float16,fp8,0,0.07859733204046886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,2,128,0,1,fp8,fp8,0,0.07250666618347168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,fp8,0,0.0786240001519521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,4,128,0,1,fp8,fp8,0,0.07270933190981548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,float16,0,0.0783786674340566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,4,128,0,1,float16,float16,0,0.07899199922879536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,8,128,0,1,float16,fp8,0,0.08056533336639404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,40,8,128,0,1,fp8,fp8,0,0.07390933235486348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,float16,0,2.6672960917154946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,2,128,0,1,fp8,fp8,0,2.4700800577799478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,2,128,0,1,float16,fp8,0,2.665114720662435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,float16,0,2.6993494033813477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,4,128,0,1,fp8,fp8,0,2.6566239992777505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,4,128,0,1,float16,fp8,0,2.6954399744669595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,fp8,0,2.902794520060221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,8,128,0,1,float16,float16,0,2.9092372258504233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,float16,0,1.6005973815917969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,40,128,0,1,fp8,fp8,0,1.4878719647725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,40,128,0,1,float16,fp8,0,1.568127950032552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,40,8,128,0,1,fp8,fp8,0,2.678629239400228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,float16,0,1.3470826148986816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,2,128,0,1,fp8,fp8,0,1.2389333248138428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,2,128,0,1,float16,fp8,0,1.3460586865743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,float16,0,1.3603626887003581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,4,128,0,1,float16,fp8,0,1.3613759676615398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,4,128,0,1,fp8,fp8,0,1.248538653055827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,float16,0,1.4026400248209636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,8,128,0,1,float16,fp8,0,1.3915093739827473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,float16,0,0.816378672917684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,40,8,128,0,1,fp8,fp8,0,1.3420213063557942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,40,128,0,1,float16,fp8,0,0.7978453636169434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,40,128,0,1,fp8,fp8,0,0.7551679611206055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,float16,0,0.6792799631754557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,2,128,0,1,float16,fp8,0,0.6797920068105062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,2,128,0,1,fp8,fp8,0,0.629530668258667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,fp8,0,0.6908693313598633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,4,128,0,1,float16,float16,0,0.6909173329671224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,4,128,0,1,fp8,fp8,0,0.634933352470398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,float16,0,0.7096959749857584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,float16,0,0.4242719809214274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,8,128,0,1,fp8,fp8,0,0.6485333442687988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,40,8,128,0,1,float16,fp8,0,0.7081813017527262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,40,128,0,1,float16,fp8,0,0.4158080021540324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,40,128,0,1,fp8,fp8,0,0.39106134573618573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,float16,0,0.35148266951243085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,2,128,0,1,float16,fp8,0,0.35288532574971515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,2,128,0,1,fp8,fp8,0,0.3163733283678691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,float16,0,0.3572640021642049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,4,128,0,1,fp8,fp8,0,0.3271733323733012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,4,128,0,1,float16,fp8,0,0.3593493302663167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,float16,0,0.36587735017140705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,float16,0,0.2263466715812683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,8,128,0,1,fp8,fp8,0,0.334112008412679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,40,8,128,0,1,float16,fp8,0,0.36501868565877277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,40,128,0,1,float16,fp8,0,0.22049599885940552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,float16,0,0.18810667594273886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,40,128,0,1,fp8,fp8,0,0.20764267444610596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,2,128,0,1,float16,fp8,0,0.187391996383667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,2,128,0,1,fp8,fp8,0,0.1707680026690165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,float16,0,0.19126399358113608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,4,128,0,1,float16,fp8,0,0.1921173334121704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,4,128,0,1,fp8,fp8,0,0.17691200971603394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,float16,0,0.19679999351501465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,float16,0,0.12812800208727518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,8,128,0,1,fp8,fp8,0,0.17940266927083334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,40,8,128,0,1,float16,fp8,0,0.19641067584355673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,40,128,0,1,float16,fp8,0,0.12549333771069845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,40,128,0,1,fp8,fp8,0,0.11774933338165283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,float16,0,0.10267733534177144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,2,128,0,1,float16,fp8,0,0.10322133700052898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,float16,0,0.10345600048700969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,2,128,0,1,fp8,fp8,0,0.09170666337013245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,4,128,0,1,fp8,fp8,0,0.09499200185139973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,float16,0,0.10527466734250386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,4,128,0,1,float16,fp8,0,0.10354133447011311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,8,128,0,1,float16,fp8,0,0.10686933000882466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,float16,0,0.06822933256626129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,40,8,128,0,1,fp8,fp8,0,0.09957333405812581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,40,128,0,1,float16,fp8,0,0.06881066660086314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,float16,0,0.062218666076660156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,40,128,0,1,fp8,fp8,0,0.0684853345155716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,2,128,0,1,fp8,fp8,0,0.0544053316116333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,2,128,0,1,float16,fp8,0,0.062319998939832054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,float16,0,0.06258133550484975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,4,128,0,1,float16,fp8,0,0.06235733131567637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,float16,0,0.06362666686375935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,4,128,0,1,fp8,fp8,0,0.05398933092753092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,8,128,0,1,float16,fp8,0,0.06363733112812042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,float16,0,0.045184001326560974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,40,8,128,0,1,fp8,fp8,0,0.056128000219662987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,40,128,0,1,float16,fp8,0,0.04371733466784159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,40,128,0,1,fp8,fp8,0,0.04156800111134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,float16,0,0.043605332573254905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,2,128,0,1,float16,fp8,0,0.04391466577847799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,2,128,0,1,fp8,fp8,0,0.03961066653331121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,float16,0,0.04390400151411692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,4,128,0,1,float16,fp8,0,0.044639999667803444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,4,128,0,1,fp8,fp8,0,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,float16,0,0.044533332188924156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,8,128,0,1,float16,fp8,0,0.04438399771849314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,40,8,128,0,1,fp8,fp8,0,0.03982399900754293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,2,128,0,1,float16,float16,0,2.330992062886556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,2,128,0,1,fp8,fp8,0,2.1759947141011557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,2,128,0,1,float16,fp8,0,2.3278026580810547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,4,128,0,1,float16,float16,0,2.3617919286092124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,4,128,0,1,float16,fp8,0,2.375413258870443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,4,128,0,1,fp8,fp8,0,2.362112045288086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,8,128,0,1,float16,float16,0,2.558880011240641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,40,128,0,1,float16,float16,0,1.4081652959187825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,40,128,0,1,float16,fp8,0,1.377471923828125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,8,128,0,1,fp8,fp8,0,2.3854986826578775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,40,8,128,0,1,float16,fp8,0,2.5462986628214517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,40,128,0,1,fp8,fp8,0,1.344549338022868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,2,128,0,1,float16,float16,0,1.1739359696706135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,2,128,0,1,float16,fp8,0,1.1751519838968914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,2,128,0,1,fp8,fp8,0,1.0865226586659749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,4,128,0,1,float16,float16,0,1.1871786912282307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,4,128,0,1,float16,fp8,0,1.1873973210652669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,4,128,0,1,fp8,fp8,0,1.113327980041504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,40,128,0,1,float16,float16,0,0.7184906800587972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,8,128,0,1,float16,float16,0,1.2325173219045003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,8,128,0,1,float16,fp8,0,1.2106719811757405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,40,8,128,0,1,fp8,fp8,0,1.1972373326619465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,2,128,0,1,float16,float16,0,0.5935306549072266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,40,128,0,1,float16,fp8,0,0.7004266579945883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,40,128,0,1,fp8,fp8,0,0.6772692998250326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,2,128,0,1,float16,fp8,0,0.59388267993927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,2,128,0,1,fp8,fp8,0,0.5510720014572144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,4,128,0,1,float16,float16,0,0.6034559806187948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,4,128,0,1,float16,fp8,0,0.6034293174743652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,4,128,0,1,fp8,fp8,0,0.5566453138987223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,40,128,0,1,float16,float16,0,0.3750986655553182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,8,128,0,1,fp8,fp8,0,0.5710560083389282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,8,128,0,1,float16,float16,0,0.6169120073318481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,40,8,128,0,1,float16,fp8,0,0.6157653331756592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,40,128,0,1,float16,fp8,0,0.3644266525904338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,2,128,0,1,float16,float16,0,0.3086293339729309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,2,128,0,1,float16,fp8,0,0.30850134293238324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,40,128,0,1,fp8,fp8,0,0.3510826826095581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,2,128,0,1,fp8,fp8,0,0.2781546711921692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,4,128,0,1,float16,float16,0,0.31382934252421063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,4,128,0,1,float16,fp8,0,0.313920001188914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,4,128,0,1,fp8,fp8,0,0.2873493234316508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,8,128,0,1,float16,float16,0,0.31913065910339355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,40,128,0,1,float16,float16,0,0.19980265696843466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,8,128,0,1,float16,fp8,0,0.318725327650706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,40,8,128,0,1,fp8,fp8,0,0.29334400097529095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,40,128,0,1,float16,fp8,0,0.19381332397460938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,40,128,0,1,fp8,fp8,0,0.1867146690686544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,2,128,0,1,float16,float16,0,0.16405333081881204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,2,128,0,1,float16,fp8,0,0.16523200273513794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,2,128,0,1,fp8,fp8,0,0.1483519971370697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,4,128,0,1,float16,float16,0,0.1674506664276123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,4,128,0,1,float16,fp8,0,0.16726400454839072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,4,128,0,1,fp8,fp8,0,0.15504533052444458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,8,128,0,1,float16,float16,0,0.1713013251622518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,8,128,0,1,float16,fp8,0,0.17032533884048462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,40,8,128,0,1,fp8,fp8,0,0.15821866194407144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,40,128,0,1,float16,float16,0,0.11080533266067505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,40,128,0,1,float16,fp8,0,0.10846400260925293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,40,128,0,1,fp8,fp8,0,0.10483200351397197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,2,128,0,1,float16,float16,0,0.09103467067082723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,2,128,0,1,float16,fp8,0,0.0906826655069987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,4,128,0,1,float16,float16,0,0.09121599793434143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,2,128,0,1,fp8,fp8,0,0.07940799991289775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,4,128,0,1,fp8,fp8,0,0.08295999964078267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,4,128,0,1,float16,fp8,0,0.0909440020720164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,8,128,0,1,float16,float16,0,0.09366400043169658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,8,128,0,1,fp8,fp8,0,0.088319996992747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,40,128,0,1,float16,float16,0,0.061205332477887474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,40,8,128,0,1,float16,fp8,0,0.09296533465385437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,40,128,0,1,float16,fp8,0,0.06049066781997681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,40,128,0,1,fp8,fp8,0,0.06021333237489065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,2,128,0,1,float16,float16,0,0.05584000051021576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,2,128,0,1,float16,fp8,0,0.05509333312511444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,2,128,0,1,fp8,fp8,0,0.04833599925041199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,4,128,0,1,float16,fp8,0,0.056202664971351624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,4,128,0,1,float16,float16,0,0.056159997979799904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,8,128,0,1,float16,float16,0,0.05607999861240387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,4,128,0,1,fp8,fp8,0,0.0480320006608963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,8,128,0,1,float16,fp8,0,0.056128000219662987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,40,8,128,0,1,fp8,fp8,0,0.04837333162625631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,40,128,0,1,float16,float16,0,0.0417546679576238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,40,128,0,1,float16,fp8,0,0.04162666698296865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,40,128,0,1,fp8,fp8,0,0.03819733361403147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,2,128,0,1,float16,float16,0,0.03803733239571253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,2,128,0,1,float16,fp8,0,0.03818133225043615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,2,128,0,1,fp8,fp8,0,0.033733333150545754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,4,128,0,1,float16,float16,0,0.0394400010506312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,4,128,0,1,float16,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,4,128,0,1,fp8,fp8,0,0.03403733422358831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,8,128,0,1,float16,float16,0,0.040106666584809623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,8,128,0,1,float16,fp8,0,0.03976000100374222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,40,8,128,0,1,fp8,fp8,0,0.03549866626660029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,40,128,0,1,float16,float16,0,0.029781334102153778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,40,128,0,1,float16,fp8,0,0.027701333165168762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,2,128,0,1,float16,float16,0,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,40,128,0,1,fp8,fp8,0,0.02548266698916753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,2,128,0,1,float16,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,2,128,0,1,fp8,fp8,0,0.02571733295917511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,4,128,0,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,4,128,0,1,fp8,fp8,0,0.025941332181294758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,4,128,0,1,float16,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,8,128,0,1,float16,float16,0,0.028223998844623566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,8,128,0,1,float16,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,40,8,128,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,2,128,0,1,float16,float16,0,1.0765493710835774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,2,128,0,1,float16,fp8,0,1.0731039841969807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,2,128,0,1,fp8,fp8,0,1.0023039976755779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,4,128,0,1,float16,fp8,0,1.0866933663686116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,4,128,0,1,fp8,fp8,0,1.0180106957753499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,4,128,0,1,float16,float16,0,1.084485371907552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,8,128,0,1,float16,float16,0,1.1067519982655842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,40,128,0,1,float16,float16,0,0.656544009844462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,40,128,0,1,float16,fp8,0,0.6378293434778849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,8,128,0,1,float16,fp8,0,1.108512004216512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,40,8,128,0,1,fp8,fp8,0,1.116042693456014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,2,128,0,1,float16,float16,0,0.5400586525599161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,40,128,0,1,fp8,fp8,0,0.6318826675415039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,2,128,0,1,float16,fp8,0,0.5393759806950887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,2,128,0,1,fp8,fp8,0,0.5105439821879069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,4,128,0,1,float16,fp8,0,0.5465333461761475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,4,128,0,1,float16,float16,0,0.5488906701405843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,4,128,0,1,fp8,fp8,0,0.5140586694081625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,8,128,0,1,float16,float16,0,0.559994657834371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,40,128,0,1,float16,float16,0,0.33930134773254395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,8,128,0,1,float16,fp8,0,0.5566293398539225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,40,128,0,1,float16,fp8,0,0.33059199651082355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,40,8,128,0,1,fp8,fp8,0,0.5240106582641602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,2,128,0,1,float16,float16,0,0.27823466062545776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,40,128,0,1,fp8,fp8,0,0.3280160029729207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,2,128,0,1,fp8,fp8,0,0.2520479957262675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,2,128,0,1,float16,fp8,0,0.2789333264033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,4,128,0,1,float16,float16,0,0.2837066650390625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,4,128,0,1,float16,fp8,0,0.2831146717071533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,4,128,0,1,fp8,fp8,0,0.2632586757342021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,8,128,0,1,float16,float16,0,0.2888373335202535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,40,128,0,1,float16,float16,0,0.18178133169809976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,8,128,0,1,float16,fp8,0,0.2874240080515544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,40,128,0,1,float16,fp8,0,0.1775839924812317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,40,8,128,0,1,fp8,fp8,0,0.2690346638361613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,2,128,0,1,float16,fp8,0,0.15271466970443726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,2,128,0,1,float16,float16,0,0.15178666512171426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,2,128,0,1,fp8,fp8,0,0.13623467087745667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,40,128,0,1,fp8,fp8,0,0.17484267552693686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,4,128,0,1,float16,float16,0,0.15466666221618652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,4,128,0,1,float16,fp8,0,0.15435733397801718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,4,128,0,1,fp8,fp8,0,0.14317333698272705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,8,128,0,1,float16,float16,0,0.15613866845766702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,8,128,0,1,float16,fp8,0,0.15568533539772034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,40,128,0,1,float16,float16,0,0.10363733768463135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,40,128,0,1,float16,fp8,0,0.1016319990158081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,40,8,128,0,1,fp8,fp8,0,0.14520532886187235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,40,128,0,1,fp8,fp8,0,0.09954667091369629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,2,128,0,1,float16,float16,0,0.08475733796755473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,2,128,0,1,float16,fp8,0,0.0844533344109853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,2,128,0,1,fp8,fp8,0,0.07454933226108551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,4,128,0,1,float16,fp8,0,0.0851093331972758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,4,128,0,1,float16,float16,0,0.08518399794896443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,4,128,0,1,fp8,fp8,0,0.07749866445859273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,8,128,0,1,fp8,fp8,0,0.08201066652933757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,8,128,0,1,float16,float16,0,0.08738133311271667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,40,8,128,0,1,float16,fp8,0,0.08718400200208028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,40,128,0,1,float16,float16,0,0.058543999989827476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,40,128,0,1,float16,fp8,0,0.05816000203291575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,40,128,0,1,fp8,fp8,0,0.060133333007494606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,2,128,0,1,float16,float16,0,0.05195199946562449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,2,128,0,1,float16,fp8,0,0.052746668457984924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,2,128,0,1,fp8,fp8,0,0.0458133320013682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,4,128,0,1,float16,float16,0,0.05212800204753876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,4,128,0,1,float16,fp8,0,0.05397866666316986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,4,128,0,1,fp8,fp8,0,0.045882667104403176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,8,128,0,1,float16,float16,0,0.05409599840641022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,8,128,0,1,float16,fp8,0,0.054272000988324486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,40,128,0,1,float16,float16,0,0.03766933331886927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,40,8,128,0,1,fp8,fp8,0,0.04623466730117798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,40,128,0,1,fp8,fp8,0,0.034448000291983284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,40,128,0,1,float16,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,2,128,0,1,float16,float16,0,0.035573333501815796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,2,128,0,1,float16,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,2,128,0,1,fp8,fp8,0,0.03166399896144867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,4,128,0,1,float16,float16,0,0.03568000098069509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,4,128,0,1,float16,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,4,128,0,1,fp8,fp8,0,0.03340800106525421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,8,128,0,1,float16,float16,0,0.037818667789300285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,8,128,0,1,float16,fp8,0,0.03630933413902918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,40,8,128,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,40,128,0,1,float16,float16,0,0.027530667682488758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,40,128,0,1,float16,fp8,0,0.026591998835404713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,40,128,0,1,fp8,fp8,0,0.023770667612552643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,2,128,0,1,float16,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,2,128,0,1,float16,float16,0,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,4,128,0,1,float16,float16,0,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,2,128,0,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,4,128,0,1,float16,fp8,0,0.025749333202838898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,4,128,0,1,fp8,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,8,128,0,1,float16,float16,0,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,8,128,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,40,8,128,0,1,fp8,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,40,128,0,1,float16,float16,0,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,40,128,0,1,float16,fp8,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,40,128,0,1,fp8,fp8,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,2,128,0,1,float16,float16,0,0.023578666150569916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,2,128,0,1,float16,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,2,128,0,1,fp8,fp8,0,0.02085866779088974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,4,128,0,1,float16,float16,0,0.023599999646345775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,4,128,0,1,fp8,fp8,0,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,4,128,0,1,float16,fp8,0,0.02334933231274287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,8,128,0,1,float16,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,8,128,0,1,float16,float16,0,0.023530667026837666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,40,8,128,0,1,fp8,fp8,0,0.021312000850836437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,2,128,0,1,float16,float16,0,0.5844533443450928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,2,128,0,1,float16,fp8,0,0.5851733287175497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,2,128,0,1,fp8,fp8,0,0.5574026505152384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,4,128,0,1,fp8,fp8,0,0.5602986812591553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,4,128,0,1,float16,float16,0,0.5936799844106039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,4,128,0,1,float16,fp8,0,0.5935466686884562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,8,128,0,1,float16,fp8,0,0.6038293441136678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,8,128,0,1,fp8,fp8,0,0.574351986249288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,40,8,128,0,1,float16,float16,0,0.6059573491414388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,40,128,0,1,float16,float16,0,0.3614453474680583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,40,128,0,1,float16,fp8,0,0.3495519955952962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,2,128,0,1,float16,float16,0,0.30140799283981323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,40,128,0,1,fp8,fp8,0,0.34858667850494385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,2,128,0,1,float16,fp8,0,0.3017333348592122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,2,128,0,1,fp8,fp8,0,0.2765760024388631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,4,128,0,1,float16,float16,0,0.3051360050837199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,4,128,0,1,float16,fp8,0,0.30524800221125287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,8,128,0,1,float16,float16,0,0.30958932638168335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,4,128,0,1,fp8,fp8,0,0.2879253427187602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,8,128,0,1,float16,fp8,0,0.3084160089492798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,40,8,128,0,1,fp8,fp8,0,0.29361067215601605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,40,128,0,1,float16,float16,0,0.1873813271522522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,40,128,0,1,float16,fp8,0,0.1832746664683024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,2,128,0,1,float16,float16,0,0.15889599919319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,40,128,0,1,fp8,fp8,0,0.18381333351135254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,2,128,0,1,float16,fp8,0,0.1586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,2,128,0,1,fp8,fp8,0,0.146096001068751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,4,128,0,1,float16,float16,0,0.16080000003178915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,4,128,0,1,float16,fp8,0,0.16087466478347778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,4,128,0,1,fp8,fp8,0,0.15172800421714783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,8,128,0,1,float16,float16,0,0.16335999965667725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,40,128,0,1,float16,float16,0,0.10426132877667744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,8,128,0,1,float16,fp8,0,0.16312533617019653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,40,8,128,0,1,fp8,fp8,0,0.15466133753458658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,40,128,0,1,float16,fp8,0,0.10301867127418518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,40,128,0,1,fp8,fp8,0,0.10441600282986958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,2,128,0,1,float16,float16,0,0.08708266417185466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,2,128,0,1,float16,fp8,0,0.0870293378829956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,2,128,0,1,fp8,fp8,0,0.07895466685295105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,4,128,0,1,float16,float16,0,0.08828266461690266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,4,128,0,1,float16,fp8,0,0.08909866213798523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,4,128,0,1,fp8,fp8,0,0.08142399787902832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,8,128,0,1,float16,float16,0,0.08938133716583252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,8,128,0,1,float16,fp8,0,0.08975467085838318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,40,8,128,0,1,fp8,fp8,0,0.08634666601816814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,40,128,0,1,float16,float16,0,0.05769599974155426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,40,128,0,1,float16,fp8,0,0.058037335673967995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,40,128,0,1,fp8,fp8,0,0.06061866879463196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,2,128,0,1,float16,float16,0,0.05226133267084757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,2,128,0,1,float16,fp8,0,0.052282666166623436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,2,128,0,1,fp8,fp8,0,0.045834665497144066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,4,128,0,1,float16,float16,0,0.05221333106358846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,4,128,0,1,float16,fp8,0,0.05226133267084757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,4,128,0,1,fp8,fp8,0,0.046816001335779824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,8,128,0,1,float16,float16,0,0.0528106689453125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,8,128,0,1,float16,fp8,0,0.05216533442338308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,40,128,0,1,float16,float16,0,0.03579200059175491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,40,8,128,0,1,fp8,fp8,0,0.04658666749795278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,40,128,0,1,float16,fp8,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,40,128,0,1,fp8,fp8,0,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,2,128,0,1,float16,float16,0,0.03346133232116699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,2,128,0,1,float16,fp8,0,0.036117332677046456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,2,128,0,1,fp8,fp8,0,0.031845333675543465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,4,128,0,1,float16,float16,0,0.03566933423280716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,4,128,0,1,float16,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,4,128,0,1,fp8,fp8,0,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,8,128,0,1,float16,float16,0,0.035616000493367515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,8,128,0,1,float16,fp8,0,0.03591466695070267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,40,8,128,0,1,fp8,fp8,0,0.03186133255561193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,40,128,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,40,128,0,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,40,128,0,1,fp8,fp8,0,0.025349333882331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,2,128,0,1,float16,float16,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,2,128,0,1,float16,fp8,0,0.0258240004380544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,2,128,0,1,fp8,fp8,0,0.023893333971500397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,4,128,0,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,4,128,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,4,128,0,1,fp8,fp8,0,0.025621332228183746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,8,128,0,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,8,128,0,1,float16,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,40,8,128,0,1,fp8,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,40,128,0,1,float16,float16,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,40,128,0,1,float16,fp8,0,0.021456000705560047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,40,128,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,2,128,0,1,float16,float16,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,2,128,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,2,128,0,1,fp8,fp8,0,0.01770666614174843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,4,128,0,1,float16,float16,0,0.01993600030740102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,4,128,0,1,float16,fp8,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,4,128,0,1,fp8,fp8,0,0.01970133309563001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,8,128,0,1,float16,float16,0,0.0199946661790212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,8,128,0,1,float16,fp8,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,40,8,128,0,1,fp8,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,40,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,40,128,0,1,float16,fp8,0,0.01788266624013583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,40,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,2,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,2,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,2,128,0,1,float16,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,4,128,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,4,128,0,1,float16,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,4,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,8,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,8,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,40,8,128,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,2,128,0,1,float16,float16,0,0.3761333227157593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,2,128,0,1,fp8,fp8,0,0.34723198413848877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,2,128,0,1,float16,fp8,0,0.3774240016937256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,4,128,0,1,float16,float16,0,0.38226131598154706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,4,128,0,1,fp8,fp8,0,0.35771199067433673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,4,128,0,1,float16,fp8,0,0.3806133270263672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,8,128,0,1,float16,float16,0,0.3852959871292114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,8,128,0,1,fp8,fp8,0,0.3626399834950765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,40,8,128,0,1,float16,fp8,0,0.38556798299153644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,40,128,0,1,float16,float16,0,0.2257546583811442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,40,128,0,1,float16,fp8,0,0.22044267257054648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,2,128,0,1,float16,float16,0,0.19662400086720785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,40,128,0,1,fp8,fp8,0,0.21805866559346518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,2,128,0,1,float16,fp8,0,0.1962239940961202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,2,128,0,1,fp8,fp8,0,0.18228799104690552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,4,128,0,1,float16,float16,0,0.1987839937210083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,4,128,0,1,float16,fp8,0,0.19918400049209595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,4,128,0,1,fp8,fp8,0,0.18708799282709757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,8,128,0,1,float16,float16,0,0.20106667280197144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,8,128,0,1,fp8,fp8,0,0.1897546648979187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,40,8,128,0,1,float16,fp8,0,0.20132799943288168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,40,128,0,1,float16,float16,0,0.12076800068219502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,40,128,0,1,float16,fp8,0,0.1183519959449768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,40,128,0,1,fp8,fp8,0,0.11918933192888896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,2,128,0,1,float16,float16,0,0.10540800293286641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,2,128,0,1,float16,fp8,0,0.10528000195821126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,4,128,0,1,float16,float16,0,0.10602666934331258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,2,128,0,1,fp8,fp8,0,0.09502399961153667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,4,128,0,1,fp8,fp8,0,0.09733333190282185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,4,128,0,1,float16,fp8,0,0.10552533467610677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,8,128,0,1,float16,float16,0,0.10791466633478801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,40,128,0,1,float16,float16,0,0.0662613312403361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,8,128,0,1,float16,fp8,0,0.10616532961527507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,40,8,128,0,1,fp8,fp8,0,0.10130666693051656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,40,128,0,1,float16,fp8,0,0.06644799808661143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,40,128,0,1,fp8,fp8,0,0.06835199892520905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,2,128,0,1,float16,fp8,0,0.06031466523806254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,2,128,0,1,float16,float16,0,0.05977066854635874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,4,128,0,1,float16,float16,0,0.05985599756240845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,2,128,0,1,fp8,fp8,0,0.054192001620928444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,4,128,0,1,float16,fp8,0,0.06016000111897787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,4,128,0,1,fp8,fp8,0,0.055045331517855324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,8,128,0,1,float16,float16,0,0.0610346645116806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,8,128,0,1,float16,fp8,0,0.06025599936644236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,40,8,128,0,1,fp8,fp8,0,0.055871998270352684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,40,128,0,1,float16,float16,0,0.040063999593257904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,40,128,0,1,float16,fp8,0,0.03994133323431015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,40,128,0,1,fp8,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,2,128,0,1,float16,float16,0,0.03799466788768768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,2,128,0,1,float16,fp8,0,0.03751466671625773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,2,128,0,1,fp8,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,4,128,0,1,float16,float16,0,0.03811733424663544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,4,128,0,1,float16,fp8,0,0.03783999880154928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,4,128,0,1,fp8,fp8,0,0.03558400024970373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,8,128,0,1,float16,float16,0,0.03806933263937632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,8,128,0,1,float16,fp8,0,0.038592000802357994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,40,8,128,0,1,fp8,fp8,0,0.0359253336985906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,40,128,0,1,float16,float16,0,0.02736533433198929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,40,128,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,40,128,0,1,fp8,fp8,0,0.02603200078010559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,2,128,0,1,float16,float16,0,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,2,128,0,1,fp8,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,2,128,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,4,128,0,1,float16,fp8,0,0.027690666417280834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,4,128,0,1,float16,float16,0,0.0277813325325648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,4,128,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,8,128,0,1,float16,float16,0,0.027679999669392902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,8,128,0,1,float16,fp8,0,0.027471999327341717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,40,8,128,0,1,fp8,fp8,0,0.025792000194390614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,40,128,0,1,float16,float16,0,0.02182399978240331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,40,128,0,1,float16,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,40,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,2,128,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,2,128,0,1,float16,fp8,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,2,128,0,1,fp8,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,4,128,0,1,float16,float16,0,0.022970666488011677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,4,128,0,1,float16,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,4,128,0,1,fp8,fp8,0,0.020400000115235645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,8,128,0,1,float16,float16,0,0.02292799949645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,8,128,0,1,float16,fp8,0,0.022944000860055287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,40,8,128,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,40,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,40,128,0,1,float16,fp8,0,0.018144000321626663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,2,128,0,1,float16,float16,0,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,40,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,2,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,2,128,0,1,fp8,fp8,0,0.01616000011563301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,4,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,4,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,4,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,8,128,0,1,float16,float16,0,0.017525333911180496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,8,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,40,8,128,0,1,fp8,fp8,0,0.01664000004529953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,40,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,40,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,40,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,2,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,2,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,2,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,4,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,4,128,0,1,float16,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,4,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,8,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,8,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,40,8,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,2,128,0,1,float16,float16,0,0.27376000086466473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,2,128,0,1,float16,fp8,0,0.27267734209696454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,2,128,0,1,fp8,fp8,0,0.25571199258168537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,4,128,0,1,float16,float16,0,0.27610133091608685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,4,128,0,1,fp8,fp8,0,0.26022400458653766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,4,128,0,1,float16,fp8,0,0.2760746677716573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,8,128,0,1,float16,float16,0,0.2787733276685079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,8,128,0,1,float16,fp8,0,0.2776053349177043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,40,8,128,0,1,fp8,fp8,0,0.26412800947825116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,40,128,0,1,float16,float16,0,0.1593653361002604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,40,128,0,1,fp8,fp8,0,0.15691199898719788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,2,128,0,1,float16,float16,0,0.1436799963315328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,40,128,0,1,float16,fp8,0,0.15718400478363037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,2,128,0,1,fp8,fp8,0,0.1318773329257965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,2,128,0,1,float16,fp8,0,0.14356266458829245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,4,128,0,1,float16,float16,0,0.14456533392270407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,4,128,0,1,float16,fp8,0,0.14523200194040933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,4,128,0,1,fp8,fp8,0,0.13474667072296143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,8,128,0,1,float16,float16,0,0.14626666903495789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,8,128,0,1,float16,fp8,0,0.1460853318373362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,40,128,0,1,float16,float16,0,0.08299200236797333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,40,128,0,1,float16,fp8,0,0.08292800188064575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,40,8,128,0,1,fp8,fp8,0,0.1402293344338735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,2,128,0,1,float16,float16,0,0.07805333534876506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,40,128,0,1,fp8,fp8,0,0.08594133456548055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,2,128,0,1,float16,fp8,0,0.07819733520348866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,4,128,0,1,float16,float16,0,0.07877333462238312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,2,128,0,1,fp8,fp8,0,0.07282133400440216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,4,128,0,1,float16,fp8,0,0.07874133189519246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,4,128,0,1,fp8,fp8,0,0.07240533332029979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,8,128,0,1,float16,float16,0,0.07898133496443431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,8,128,0,1,float16,fp8,0,0.07898133496443431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,40,8,128,0,1,fp8,fp8,0,0.07323200007279713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,40,128,0,1,float16,float16,0,0.04828799764315287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,40,128,0,1,float16,fp8,0,0.04796266555786133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,40,128,0,1,fp8,fp8,0,0.045797333121299744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,2,128,0,1,float16,float16,0,0.047600001096725464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,2,128,0,1,float16,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,2,128,0,1,fp8,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,4,128,0,1,float16,float16,0,0.04615999758243561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,8,128,0,1,float16,float16,0,0.04791999856630961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,4,128,0,1,float16,fp8,0,0.04714666803677877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,4,128,0,1,fp8,fp8,0,0.04472533365090688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,8,128,0,1,float16,fp8,0,0.04775999983151754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,40,128,0,1,float16,float16,0,0.032485333581765495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,40,8,128,0,1,fp8,fp8,0,0.04535999894142151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,40,128,0,1,float16,fp8,0,0.031930667658646904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,40,128,0,1,fp8,fp8,0,0.030159999926884968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,2,128,0,1,float16,float16,0,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,2,128,0,1,float16,fp8,0,0.031701333820819855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,2,128,0,1,fp8,fp8,0,0.029872000217437744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,4,128,0,1,float16,float16,0,0.031898667414983116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,4,128,0,1,float16,fp8,0,0.03164266546567281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,4,128,0,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,8,128,0,1,float16,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,8,128,0,1,float16,float16,0,0.03203733265399933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,40,128,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,40,8,128,0,1,fp8,fp8,0,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,40,128,0,1,float16,fp8,0,0.025936000049114227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,40,128,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,2,128,0,1,float16,float16,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,2,128,0,1,float16,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,2,128,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,4,128,0,1,float16,float16,0,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,4,128,0,1,float16,fp8,0,0.02351466566324234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,4,128,0,1,fp8,fp8,0,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,8,128,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,8,128,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,40,8,128,0,1,float16,fp8,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,40,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,40,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,40,128,0,1,fp8,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,2,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,2,128,0,1,float16,float16,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,2,128,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,4,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,4,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,4,128,0,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,8,128,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,8,128,0,1,float16,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,40,8,128,0,1,fp8,fp8,0,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,40,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,40,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,2,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,40,128,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,2,128,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,2,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,4,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,4,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,4,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,8,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,8,128,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,40,8,128,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,40,128,0,1,float16,float16,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,40,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,40,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,2,128,0,1,float16,float16,0,0.016810666769742966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,2,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,2,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,4,128,0,1,float16,float16,0,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,4,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,8,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,4,128,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,8,128,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,40,8,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,2,128,0,1,float16,float16,0,0.22261865933736166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,2,128,0,1,float16,fp8,0,0.22246932983398438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,2,128,0,1,fp8,fp8,0,0.20994667212168375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,4,128,0,1,float16,float16,0,0.2228053410847982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,4,128,0,1,float16,fp8,0,0.2226933240890503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,4,128,0,1,fp8,fp8,0,0.21254400412241617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,8,128,0,1,fp8,fp8,0,0.21589332818984985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,8,128,0,1,float16,fp8,0,0.22426666816075644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,40,8,128,0,1,float16,float16,0,0.22303466002146402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,40,128,0,1,float16,float16,0,0.12467199563980103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,40,128,0,1,float16,fp8,0,0.12361066540082295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,40,128,0,1,fp8,fp8,0,0.1253493328889211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,2,128,0,1,float16,float16,0,0.11734933654467265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,2,128,0,1,fp8,fp8,0,0.11151466766993205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,2,128,0,1,float16,fp8,0,0.1184213360150655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,4,128,0,1,float16,float16,0,0.1179200013478597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,4,128,0,1,float16,fp8,0,0.11776533722877502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,4,128,0,1,fp8,fp8,0,0.11141332983970642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,8,128,0,1,float16,fp8,0,0.1179093321164449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,8,128,0,1,fp8,fp8,0,0.1116480032602946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,40,8,128,0,1,float16,float16,0,0.11849600076675415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,40,128,0,1,float16,float16,0,0.06650666892528534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,40,128,0,1,float16,fp8,0,0.06667733192443848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,40,128,0,1,fp8,fp8,0,0.06464000046253204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,2,128,0,1,float16,fp8,0,0.06483200192451477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,2,128,0,1,float16,float16,0,0.06594666838645935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,2,128,0,1,fp8,fp8,0,0.06411733229955037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,4,128,0,1,float16,float16,0,0.0662773350874583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,4,128,0,1,float16,fp8,0,0.06700799862543742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,4,128,0,1,fp8,fp8,0,0.06454400221506755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,8,128,0,1,float16,float16,0,0.06604800124963124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,8,128,0,1,float16,fp8,0,0.06727999945481618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,40,8,128,0,1,fp8,fp8,0,0.0646666685740153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,40,128,0,1,float16,float16,0,0.04154666761557261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,40,128,0,1,float16,fp8,0,0.04248533149560293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,40,128,0,1,fp8,fp8,0,0.04041599979003271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,2,128,0,1,float16,float16,0,0.04161066561937332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,2,128,0,1,float16,fp8,0,0.04195733368396759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,2,128,0,1,fp8,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,4,128,0,1,float16,float16,0,0.04168533285458883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,4,128,0,1,fp8,fp8,0,0.039893334110577904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,4,128,0,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,8,128,0,1,float16,float16,0,0.041663999358812966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,8,128,0,1,float16,fp8,0,0.041802664597829185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,40,8,128,0,1,fp8,fp8,0,0.04012800008058548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,40,128,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,40,128,0,1,float16,fp8,0,0.029546665648619335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,2,128,0,1,float16,float16,0,0.027637332677841187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,40,128,0,1,fp8,fp8,0,0.0277813325325648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,2,128,0,1,float16,fp8,0,0.02731200059254964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,2,128,0,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,4,128,0,1,float16,float16,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,4,128,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,4,128,0,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,8,128,0,1,float16,float16,0,0.027834666272004444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,8,128,0,1,float16,fp8,0,0.027647999425729115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,40,8,128,0,1,fp8,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,40,128,0,1,float16,float16,0,0.021935999393463135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,40,128,0,1,float16,fp8,0,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,40,128,0,1,fp8,fp8,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,2,128,0,1,float16,float16,0,0.02161066730817159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,2,128,0,1,float16,fp8,0,0.021642667551835377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,2,128,0,1,fp8,fp8,0,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,4,128,0,1,float16,float16,0,0.02164799968401591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,4,128,0,1,float16,fp8,0,0.021695998807748158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,4,128,0,1,fp8,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,8,128,0,1,float16,float16,0,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,8,128,0,1,float16,fp8,0,0.021407999098300934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,40,8,128,0,1,fp8,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,40,128,0,1,float16,float16,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,40,128,0,1,float16,fp8,0,0.017642666896184284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,40,128,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,2,128,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,2,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,4,128,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,2,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,4,128,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,4,128,0,1,fp8,fp8,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,8,128,0,1,float16,float16,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,8,128,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,40,8,128,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,40,128,0,1,float16,float16,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,40,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,40,128,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,2,128,0,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,2,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,2,128,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,4,128,0,1,float16,float16,0,0.015568000574906668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,4,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,4,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,8,128,0,1,float16,float16,0,0.015674666812022526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,8,128,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,40,8,128,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,40,128,0,1,float16,float16,0,0.015989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,40,128,0,1,float16,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,40,128,0,1,fp8,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,2,128,0,1,float16,float16,0,0.016314666718244553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,2,128,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,2,128,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,4,128,0,1,float16,float16,0,0.015775999675194424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,4,128,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,4,128,0,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,8,128,0,1,float16,float16,0,0.01595199977358182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,8,128,0,1,float16,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,40,8,128,0,1,fp8,fp8,0,0.0163680004576842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,2,128,0,1,float16,float16,0,0.18768533070882162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,2,128,0,1,float16,fp8,0,0.1876586675643921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,2,128,0,1,fp8,fp8,0,0.17114667097727457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,4,128,0,1,float16,float16,0,0.18772266308466592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,4,128,0,1,fp8,fp8,0,0.1710240046183268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,8,128,0,1,float16,float16,0,0.18754667043685913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,4,128,0,1,float16,fp8,0,0.18829333782196045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,0,0.10121066371599834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,8,128,0,1,float16,fp8,0,0.1891146699587504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,0,0.10095466176668803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,40,8,128,0,1,fp8,fp8,0,0.17137066523234049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,40,128,0,1,fp8,fp8,0,0.09326933821042378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,2,128,0,1,float16,float16,0,0.10102933645248413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,2,128,0,1,fp8,fp8,0,0.09303999940554301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,2,128,0,1,float16,fp8,0,0.10149866342544556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,4,128,0,1,float16,float16,0,0.09934932986895244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,4,128,0,1,float16,fp8,0,0.09950400392214458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,8,128,0,1,float16,float16,0,0.10125333070755005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,4,128,0,1,fp8,fp8,0,0.09299199779828389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,8,128,0,1,float16,fp8,0,0.09966933727264404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,0,0.056314667065938316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,0,0.05649066468079885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,40,8,128,0,1,fp8,fp8,0,0.09150399764378865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,40,128,0,1,fp8,fp8,0,0.05295999844868978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,2,128,0,1,float16,float16,0,0.05641599992911021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,2,128,0,1,float16,fp8,0,0.056421334544817604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,2,128,0,1,fp8,fp8,0,0.05429333448410034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,4,128,0,1,float16,float16,0,0.05658133327960968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,4,128,0,1,float16,fp8,0,0.056618665655454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,4,128,0,1,fp8,fp8,0,0.05402666827042898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,8,128,0,1,float16,float16,0,0.0562720000743866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,8,128,0,1,float16,fp8,0,0.05801066756248474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,40,8,128,0,1,fp8,fp8,0,0.05379199981689453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,0,0.03585066646337509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,0,0.03791466603676478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,40,128,0,1,fp8,fp8,0,0.03521066655715307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,2,128,0,1,float16,fp8,0,0.03616533428430557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,2,128,0,1,float16,float16,0,0.03748800108830134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,2,128,0,1,fp8,fp8,0,0.035504000882307686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,4,128,0,1,float16,float16,0,0.036271999279658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,4,128,0,1,float16,fp8,0,0.03792533278465271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,4,128,0,1,fp8,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,8,128,0,1,float16,float16,0,0.03602666656176249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,8,128,0,1,float16,fp8,0,0.037418665985266365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,40,8,128,0,1,fp8,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,0,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,0,0.02571733295917511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,40,128,0,1,fp8,fp8,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,2,128,0,1,float16,float16,0,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,2,128,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,2,128,0,1,fp8,fp8,0,0.025968000292778015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,4,128,0,1,float16,float16,0,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,8,128,0,1,float16,float16,0,0.02720000098148982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,4,128,0,1,fp8,fp8,0,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,4,128,0,1,float16,fp8,0,0.02571733295917511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,8,128,0,1,float16,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,40,8,128,0,1,fp8,fp8,0,0.02587199956178665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,40,128,0,1,fp8,fp8,0,0.021013334393501282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,2,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,2,128,0,1,float16,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,4,128,0,1,float16,float16,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,2,128,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,4,128,0,1,float16,fp8,0,0.021914665897687275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,8,128,0,1,float16,fp8,0,0.021701333423455555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,4,128,0,1,fp8,fp8,0,0.019152000546455383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,8,128,0,1,float16,float16,0,0.020879998803138733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,0,0.01736533393462499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,40,8,128,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,40,128,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,2,128,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,2,128,0,1,float16,float16,0,0.018976000448067982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,2,128,0,1,fp8,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,4,128,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,4,128,0,1,float16,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,4,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,8,128,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,8,128,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,40,8,128,0,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,40,128,0,1,fp8,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,2,128,0,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,2,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,2,128,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,4,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,4,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,4,128,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,8,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,8,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,40,8,128,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,40,128,0,1,fp8,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,2,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,2,128,0,1,float16,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,4,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,2,128,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,4,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,4,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,8,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,8,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,40,8,128,0,1,fp8,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,1,128,0,1,fp8,fp8,0,8.828213373819986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,float16,0,11.72659683227539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,1,128,0,1,float16,fp8,0,11.40179189046224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,float16,0,11.442858378092447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,2,128,0,1,float16,fp8,0,11.423786163330078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,2,128,0,1,fp8,fp8,0,8.819594701131185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,float16,0,11.568517049153646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,4,128,0,1,float16,fp8,0,11.745530446370443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,4,128,0,1,fp8,fp8,0,8.940698623657227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,float16,0,11.761098225911459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,8,128,0,1,fp8,fp8,0,9.032650629679361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,32,8,128,0,1,float16,fp8,0,11.530068715413412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,float16,0,6.2135359446207685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,32,128,0,1,fp8,fp8,0,4.7355092366536455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,32,128,0,1,float16,fp8,0,6.3415788014729815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,float16,0,5.8821760813395185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,1,128,0,1,float16,fp8,0,5.882991790771484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,1,128,0,1,fp8,fp8,0,4.483104070027669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,float16,0,5.606954574584961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,2,128,0,1,float16,fp8,0,5.933493296305339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,2,128,0,1,fp8,fp8,0,4.495519955952962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,float16,0,5.8669281005859375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,4,128,0,1,float16,fp8,0,6.028421401977539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,4,128,0,1,fp8,fp8,0,4.497946739196777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,float16,0,5.746117273966472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,float16,0,2.9996747970581055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,32,128,0,1,float16,fp8,0,3.03053347269694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,8,128,0,1,fp8,fp8,0,4.519311904907227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,32,8,128,0,1,float16,fp8,0,5.708554585774739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,32,128,0,1,fp8,fp8,0,2.4849653244018555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,float16,0,2.9017759958902993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,1,128,0,1,float16,fp8,0,2.8731040954589844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,1,128,0,1,fp8,fp8,0,2.347562630971273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,float16,0,2.8745654424031577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,2,128,0,1,float16,fp8,0,2.9445387522379556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,2,128,0,1,fp8,fp8,0,2.3612000147501626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,float16,0,2.871333440144857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,4,128,0,1,float16,fp8,0,2.973578770955404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,4,128,0,1,fp8,fp8,0,2.370602607727051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,float16,0,2.8992640177408853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,8,128,0,1,float16,fp8,0,2.886768023173014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,32,8,128,0,1,fp8,fp8,0,2.381114641825358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,float16,0,1.6222346623738606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,32,128,0,1,float16,fp8,0,1.607968012491862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,32,128,0,1,fp8,fp8,0,1.3525865872701008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,float16,0,1.5481866200764973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,1,128,0,1,float16,fp8,0,1.5279146830240886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,1,128,0,1,fp8,fp8,0,1.3117492993672688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,float16,0,1.5250026384989421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,2,128,0,1,fp8,fp8,0,1.3029013474782307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,2,128,0,1,float16,fp8,0,1.5856107076009114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,float16,0,1.5445067087809246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,4,128,0,1,float16,fp8,0,1.5546773274739583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,4,128,0,1,fp8,fp8,0,1.3093706766764324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,float16,0,1.5847519238789876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,8,128,0,1,float16,fp8,0,1.5617012977600098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,32,8,128,0,1,fp8,fp8,0,1.3789386749267578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,1,128,0,1,fp8,fp8,0,5.259546597798665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,float16,0,6.761557261149089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,1,128,0,1,float16,fp8,0,6.761797587076823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,float16,0,6.761423746744792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,2,128,0,1,fp8,fp8,0,5.300314585367839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,2,128,0,1,float16,fp8,0,6.637509028116862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,float16,0,6.866352081298828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,4,128,0,1,float16,fp8,0,6.9068959554036455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,4,128,0,1,fp8,fp8,0,5.332709312438965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,float16,0,6.820320129394531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,8,128,0,1,float16,fp8,0,6.975311915079753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,32,8,128,0,1,fp8,fp8,0,5.327354749043782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,float16,0,3.5821119944254556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,32,128,0,1,float16,fp8,0,3.540442784627279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,32,128,0,1,fp8,fp8,0,2.8805227279663086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,float16,0,3.2451467514038086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,1,128,0,1,float16,fp8,0,3.420351982116699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,1,128,0,1,fp8,fp8,0,2.722293217976888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,float16,0,3.312272071838379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,2,128,0,1,float16,fp8,0,3.306730588277181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,2,128,0,1,fp8,fp8,0,2.728266716003418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,float16,0,3.278623898824056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,4,128,0,1,fp8,fp8,0,2.721712112426758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,4,128,0,1,float16,fp8,0,3.4839839935302734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,float16,0,3.494826634724935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,float16,0,1.8547840118408203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,32,128,0,1,float16,fp8,0,1.8432106971740723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,8,128,0,1,fp8,fp8,0,2.7458985646565757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,32,8,128,0,1,float16,fp8,0,3.4209280014038086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,32,128,0,1,fp8,fp8,0,1.63702392578125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,float16,0,1.727893352508545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,1,128,0,1,float16,fp8,0,1.706234614054362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,1,128,0,1,fp8,fp8,0,1.5149493217468262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,float16,0,1.7346080144246419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,2,128,0,1,fp8,fp8,0,1.5475306510925293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,2,128,0,1,float16,fp8,0,1.7183945973714192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,float16,0,1.7292906443277996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,4,128,0,1,float16,fp8,0,1.7527519861857097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,4,128,0,1,fp8,fp8,0,1.4690292676289876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,float16,0,1.7636267344156902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,8,128,0,1,float16,fp8,0,1.7696960767110188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,float16,0,1.0035200119018555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,32,8,128,0,1,fp8,fp8,0,1.5796160697937012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,32,128,0,1,float16,fp8,0,1.0380213260650635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,32,128,0,1,fp8,fp8,0,0.8659413655598959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,float16,0,0.9382080237070719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,1,128,0,1,float16,fp8,0,0.9512746334075928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,1,128,0,1,fp8,fp8,0,0.8236533006032308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,float16,0,0.9528000354766846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,2,128,0,1,float16,fp8,0,0.9532907009124756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,2,128,0,1,fp8,fp8,0,0.8269333044687907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,float16,0,0.96014936765035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,4,128,0,1,float16,fp8,0,0.9626560211181641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,4,128,0,1,fp8,fp8,0,0.8269973595937093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,float16,0,0.9702026844024658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,8,128,0,1,float16,fp8,0,0.9714986483256022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,32,8,128,0,1,fp8,fp8,0,0.8343146642049154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,1,128,0,1,fp8,fp8,0,3.8358774185180664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,float16,0,4.595888137817383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,1,128,0,1,float16,fp8,0,4.934000015258789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,float16,0,4.793295860290527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,2,128,0,1,fp8,fp8,0,3.84991455078125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,2,128,0,1,float16,fp8,0,4.74452273050944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,float16,0,4.943754514058431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,4,128,0,1,fp8,fp8,0,3.852362632751465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,4,128,0,1,float16,fp8,0,4.8283999760945635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,float16,0,4.911594708760579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,float16,0,2.5674452781677246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,8,128,0,1,float16,fp8,0,5.025162696838379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,32,8,128,0,1,fp8,fp8,0,3.8904425303141275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,32,128,0,1,float16,fp8,0,2.620405356089274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,32,128,0,1,fp8,fp8,0,2.1329387029012046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,float16,0,2.380512078603109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,1,128,0,1,float16,fp8,0,2.376410643259684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,1,128,0,1,fp8,fp8,0,1.994117259979248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,float16,0,2.441962718963623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,2,128,0,1,float16,fp8,0,2.4812533060709634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,2,128,0,1,fp8,fp8,0,1.9922720591227214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,float16,0,2.4004906018575034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,4,128,0,1,float16,fp8,0,2.465328057607015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,4,128,0,1,fp8,fp8,0,2.005882740020752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,float16,0,2.4220639864603677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,8,128,0,1,float16,fp8,0,2.451632022857666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,float16,0,1.3696799278259277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,32,8,128,0,1,fp8,fp8,0,2.026847998301188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,32,128,0,1,float16,fp8,0,1.3706132570902507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,32,128,0,1,fp8,fp8,0,1.2660746574401855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,float16,0,1.257322629292806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,1,128,0,1,fp8,fp8,0,1.1075092951456706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,1,128,0,1,float16,fp8,0,1.2689812978108723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,float16,0,1.2721333503723145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,2,128,0,1,float16,fp8,0,1.2715306282043457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,2,128,0,1,fp8,fp8,0,1.078927993774414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,float16,0,1.2866933345794678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,4,128,0,1,float16,fp8,0,1.284010648727417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,4,128,0,1,fp8,fp8,0,1.1028052965799968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,float16,0,1.2769546508789062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,8,128,0,1,float16,fp8,0,1.304538647333781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,float16,0,0.7629493077596029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,32,8,128,0,1,fp8,fp8,0,1.095952033996582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,32,128,0,1,float16,fp8,0,0.7682720025380453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,32,128,0,1,fp8,fp8,0,0.6532853444417318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,float16,0,0.7079253196716309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,1,128,0,1,float16,fp8,0,0.7136053244272867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,1,128,0,1,fp8,fp8,0,0.6207199891408285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,float16,0,0.7156213124593099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,2,128,0,1,float16,fp8,0,0.7150186697642008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,2,128,0,1,fp8,fp8,0,0.6240479946136475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,float16,0,0.714949369430542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,4,128,0,1,float16,fp8,0,0.7282240390777588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,4,128,0,1,fp8,fp8,0,0.6244959831237793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,float16,0,0.7295573552449545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,8,128,0,1,fp8,fp8,0,0.6296799977620443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,32,8,128,0,1,float16,fp8,0,0.7231573263804117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,1,128,0,1,fp8,fp8,0,5.196144104003906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,float16,0,6.6268056233723955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,1,128,0,1,float16,fp8,0,6.404245376586914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,float16,0,6.528282801310222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,2,128,0,1,fp8,fp8,0,5.219498634338379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,2,128,0,1,float16,fp8,0,6.61073621114095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,float16,0,6.434272130330403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,4,128,0,1,float16,fp8,0,6.34443728129069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,4,128,0,1,fp8,fp8,0,5.232607841491699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,float16,0,6.590853373209636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,8,128,0,1,fp8,fp8,0,5.294346809387207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,32,8,128,0,1,float16,fp8,0,6.506101608276367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,float16,0,3.4889599482218423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,32,128,0,1,fp8,fp8,0,2.859615961710612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,32,128,0,1,float16,fp8,0,3.5289599100748696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,float16,0,3.218730608622233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,1,128,0,1,float16,fp8,0,3.192357381184896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,1,128,0,1,fp8,fp8,0,2.6314239501953125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,float16,0,3.1878932317097983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,2,128,0,1,float16,fp8,0,3.2490666707356772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,2,128,0,1,fp8,fp8,0,2.6448373794555664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,float16,0,3.18067200978597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,4,128,0,1,float16,fp8,0,3.2774346669514975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,4,128,0,1,fp8,fp8,0,2.6634507179260254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,float16,0,3.2847518920898438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,float16,0,1.7801334063212078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,8,128,0,1,float16,fp8,0,3.3073708216349282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,32,8,128,0,1,fp8,fp8,0,2.6992371877034507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,32,128,0,1,float16,fp8,0,1.826709270477295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,32,128,0,1,fp8,fp8,0,1.571232000986735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,float16,0,1.6304213205973308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,1,128,0,1,float16,fp8,0,1.636074701944987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,1,128,0,1,fp8,fp8,0,1.3748373985290527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,float16,0,1.6329654057820637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,2,128,0,1,float16,fp8,0,1.6511306762695312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,2,128,0,1,fp8,fp8,0,1.4226773579915364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,float16,0,1.6398773193359375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,4,128,0,1,fp8,fp8,0,1.382314682006836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,4,128,0,1,float16,fp8,0,1.647706667582194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,float16,0,1.6713013648986816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,8,128,0,1,float16,fp8,0,1.6696906089782715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,float16,0,0.9693120320638021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,32,8,128,0,1,fp8,fp8,0,1.4037866592407227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,32,128,0,1,float16,fp8,0,0.973370631535848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,32,128,0,1,fp8,fp8,0,0.8604426383972168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,float16,0,0.8716959953308105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,1,128,0,1,float16,fp8,0,0.871071974436442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,1,128,0,1,fp8,fp8,0,0.7605813344319662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,float16,0,0.8796373208363851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,2,128,0,1,float16,fp8,0,0.8757332960764567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,2,128,0,1,fp8,fp8,0,0.7624853452046713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,float16,0,0.8873279889424642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,4,128,0,1,float16,fp8,0,0.8801653385162354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,4,128,0,1,fp8,fp8,0,0.7650880018870035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,float16,0,0.8958453337351481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,8,128,0,1,float16,fp8,0,0.8898133436838785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,32,8,128,0,1,fp8,fp8,0,0.769050677617391
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,float16,0,0.5500479936599731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,32,128,0,1,float16,fp8,0,0.5517706473668417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,32,128,0,1,fp8,fp8,0,0.476032018661499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,float16,0,0.4983466863632202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,1,128,0,1,float16,fp8,0,0.49533331394195557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,1,128,0,1,fp8,fp8,0,0.44894933700561523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,float16,0,0.5015573501586914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,2,128,0,1,float16,fp8,0,0.49930667877197266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,2,128,0,1,fp8,fp8,0,0.45092801253000897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,float16,0,0.5102880001068115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,4,128,0,1,fp8,fp8,0,0.4500480095545451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,4,128,0,1,float16,fp8,0,0.5097546577453613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,float16,0,0.517029325167338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,8,128,0,1,fp8,fp8,0,0.45351465543111164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,32,8,128,0,1,float16,fp8,0,0.5177706480026245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,1,128,0,1,fp8,fp8,0,3.2301225662231445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,float16,0,3.907519976298014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,1,128,0,1,float16,fp8,0,3.8290878931681314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,float16,0,3.8868373235066733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,2,128,0,1,fp8,fp8,0,3.249786694844564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,2,128,0,1,float16,fp8,0,3.8706347147623696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,float16,0,3.9375893274943032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,4,128,0,1,float16,fp8,0,3.9672746658325195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,4,128,0,1,fp8,fp8,0,3.27509339650472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,float16,0,3.913466771443685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,8,128,0,1,fp8,fp8,0,3.3144054412841797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,32,8,128,0,1,float16,fp8,0,3.9805386861165366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,float16,0,2.1774293581644693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,32,128,0,1,fp8,fp8,0,1.8352905909220378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,32,128,0,1,float16,fp8,0,2.213850657145182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,float16,0,1.9377652804056804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,1,128,0,1,float16,fp8,0,1.958672046661377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,1,128,0,1,fp8,fp8,0,1.6557599703470867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,float16,0,1.9293492635091145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,2,128,0,1,float16,fp8,0,1.9761759440104167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,2,128,0,1,fp8,fp8,0,1.7397759755452473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,float16,0,1.9674720764160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,4,128,0,1,float16,fp8,0,1.9765226046244304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,4,128,0,1,fp8,fp8,0,1.7594134012858074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,float16,0,1.990026632944743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,float16,0,1.1337227026621501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,8,128,0,1,float16,fp8,0,2.022245407104492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,32,8,128,0,1,fp8,fp8,0,1.692069371541341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,32,128,0,1,float16,fp8,0,1.1586026350657146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,32,128,0,1,fp8,fp8,0,0.9674399693806967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,float16,0,1.0192906856536865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,1,128,0,1,fp8,fp8,0,0.8899093468983968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,1,128,0,1,float16,fp8,0,1.0102240244547527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,float16,0,1.0223039786020915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,2,128,0,1,float16,fp8,0,1.0248479843139648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,2,128,0,1,fp8,fp8,0,0.8849973678588867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,float16,0,1.0239946842193604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,4,128,0,1,float16,fp8,0,1.0339466730753581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,4,128,0,1,fp8,fp8,0,0.8866026401519775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,float16,0,1.0439093112945557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,8,128,0,1,float16,fp8,0,1.0433599948883057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,float16,0,0.6232266823450724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,32,8,128,0,1,fp8,fp8,0,0.8980693022410074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,32,128,0,1,float16,fp8,0,0.6325759887695312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,32,128,0,1,fp8,fp8,0,0.5435200134913126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,float16,0,0.5594026645024618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,1,128,0,1,fp8,fp8,0,0.4927146832148234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,1,128,0,1,float16,fp8,0,0.5582026640574137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,float16,0,0.5600853363672892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,2,128,0,1,fp8,fp8,0,0.4946719805399577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,2,128,0,1,float16,fp8,0,0.5635786851247152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,float16,0,0.5661760171254476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,4,128,0,1,float16,fp8,0,0.5655999978383383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,4,128,0,1,fp8,fp8,0,0.49885332584381104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,float16,0,0.5746133327484131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,8,128,0,1,float16,fp8,0,0.5779199997584025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,32,8,128,0,1,fp8,fp8,0,0.501690665880839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,float16,0,0.367573340733846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,32,128,0,1,float16,fp8,0,0.3721546729405721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,32,128,0,1,fp8,fp8,0,0.32257066170374554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,float16,0,0.3229440053304036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,1,128,0,1,float16,fp8,0,0.32715733846028644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,1,128,0,1,fp8,fp8,0,0.297760009765625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,float16,0,0.3264960050582886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,2,128,0,1,float16,fp8,0,0.32264000177383423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,2,128,0,1,fp8,fp8,0,0.2999519904454549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,float16,0,0.32860267162323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,4,128,0,1,float16,fp8,0,0.33032000064849854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,4,128,0,1,fp8,fp8,0,0.3017173409461975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,float16,0,0.33906133969624835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,8,128,0,1,float16,fp8,0,0.3395306666692098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,32,8,128,0,1,fp8,fp8,0,0.30587200323740643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,1,128,0,1,fp8,fp8,0,3.402928034464518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,float16,0,3.920586585998535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,1,128,0,1,float16,fp8,0,3.9830452601114907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,2,128,0,1,fp8,fp8,0,3.4343786239624023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,float16,0,3.962714513142904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,2,128,0,1,float16,fp8,0,3.958575884501139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,4,128,0,1,fp8,fp8,0,3.447733243306478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,fp8,0,4.031951904296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,4,128,0,1,float16,float16,0,3.9970827102661133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,float16,0,4.141477266947429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,float16,0,2.277263959248861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,32,128,0,1,float16,fp8,0,2.2927680015563965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,8,128,0,1,fp8,fp8,0,3.506389300028483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,32,8,128,0,1,float16,fp8,0,4.142565409342448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,32,128,0,1,fp8,fp8,0,1.9501439730326335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,float16,0,1.9626827239990234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,1,128,0,1,float16,fp8,0,1.9812053044637044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,1,128,0,1,fp8,fp8,0,1.7092159589131672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,float16,0,1.9813332557678223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,2,128,0,1,float16,fp8,0,1.9865226745605469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,2,128,0,1,fp8,fp8,0,1.7648213704427083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,float16,0,1.9964159329732258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,4,128,0,1,float16,fp8,0,2.021392027537028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,4,128,0,1,fp8,fp8,0,1.7338879903157551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,float16,0,2.0545867284139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,8,128,0,1,float16,fp8,0,2.0662986437479653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,float16,0,1.1674826939900715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,32,8,128,0,1,fp8,fp8,0,1.76473601659139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,32,128,0,1,float16,fp8,0,1.195141315460205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,32,128,0,1,fp8,fp8,0,1.0149119695027669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,float16,0,1.0094559987386067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,1,128,0,1,fp8,fp8,0,0.9014399846394857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,1,128,0,1,float16,fp8,0,1.0182560284932454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,float16,0,1.026410659154256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,2,128,0,1,float16,fp8,0,1.0285173257191975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,2,128,0,1,fp8,fp8,0,0.8988693555196127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,float16,0,1.032528003056844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,4,128,0,1,float16,fp8,0,1.0381546815236409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,4,128,0,1,fp8,fp8,0,0.9008426666259766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,float16,0,1.0545012950897217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,float16,0,0.6228106816609701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,8,128,0,1,float16,fp8,0,1.0623359680175781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,32,8,128,0,1,fp8,fp8,0,0.9181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,32,128,0,1,float16,fp8,0,0.636949340502421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,32,128,0,1,fp8,fp8,0,0.543013334274292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,float16,0,0.5439200003941854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,1,128,0,1,float16,fp8,0,0.5457706848780314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,1,128,0,1,fp8,fp8,0,0.48416535059611004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,float16,0,0.5475466648737589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,2,128,0,1,float16,fp8,0,0.5555733442306519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,2,128,0,1,fp8,fp8,0,0.48603200912475586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,float16,0,0.5548373460769653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,4,128,0,1,float16,fp8,0,0.5536746581395467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,4,128,0,1,fp8,fp8,0,0.49027733008066815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,float16,0,0.563098669052124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,8,128,0,1,float16,fp8,0,0.5686933199564616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,float16,0,0.35155733426411945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,32,8,128,0,1,fp8,fp8,0,0.4957173268000285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,32,128,0,1,float16,fp8,0,0.3575626611709595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,32,128,0,1,fp8,fp8,0,0.30990399916966754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,float16,0,0.30264532566070557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,1,128,0,1,fp8,fp8,0,0.2789173324902852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,1,128,0,1,float16,fp8,0,0.3012106617291768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,float16,0,0.30750399827957153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,2,128,0,1,float16,fp8,0,0.304965337117513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,2,128,0,1,fp8,fp8,0,0.2813760042190552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,float16,0,0.31042667229970294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,4,128,0,1,float16,fp8,0,0.3129280010859172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,4,128,0,1,fp8,fp8,0,0.2836373249689738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,float16,0,0.31940799951553345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,8,128,0,1,float16,fp8,0,0.3192533254623413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,32,8,128,0,1,fp8,fp8,0,0.2879306674003601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,float16,0,0.2174826661745707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,32,128,0,1,float16,fp8,0,0.21869866053263345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,32,128,0,1,fp8,fp8,0,0.19267199436823526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,float16,0,0.1877066691716512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,1,128,0,1,float16,fp8,0,0.18713066975275675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,1,128,0,1,fp8,fp8,0,0.1723733345667521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,float16,0,0.18638400236765543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,2,128,0,1,float16,fp8,0,0.18859734137852988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,2,128,0,1,fp8,fp8,0,0.17202667395273843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,float16,0,0.1867306629816691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,4,128,0,1,fp8,fp8,0,0.17457065979639688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,4,128,0,1,float16,fp8,0,0.1874879995981852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,float16,0,0.1888373295466105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,8,128,0,1,fp8,fp8,0,0.17949867248535156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,32,8,128,0,1,float16,fp8,0,0.1909173329671224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,float16,0,2.494501272837321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,1,128,0,1,fp8,fp8,0,2.2092159589131675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,1,128,0,1,float16,fp8,0,2.5185707410176597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,float16,0,2.5276853243509927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,2,128,0,1,fp8,fp8,0,2.224266688028971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,2,128,0,1,float16,fp8,0,2.523205280303955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,float16,0,2.5573973655700684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,4,128,0,1,float16,fp8,0,2.567621390024821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,4,128,0,1,fp8,fp8,0,2.241290728251139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,float16,0,2.6041599909464517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,8,128,0,1,fp8,fp8,0,2.2857866287231445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,32,8,128,0,1,float16,fp8,0,2.63374392191569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,float16,0,1.4780373573303223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,32,128,0,1,fp8,fp8,0,1.2873546282450359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,32,128,0,1,float16,fp8,0,1.4909440676371257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,float16,0,1.2751306692759197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,1,128,0,1,fp8,fp8,0,1.122213363647461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,1,128,0,1,float16,fp8,0,1.2755680084228516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,float16,0,1.2820160388946533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,2,128,0,1,fp8,fp8,0,1.1280799706776936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,2,128,0,1,float16,fp8,0,1.2808266480763753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,float16,0,1.2964426676432292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,4,128,0,1,fp8,fp8,0,1.134661356608073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,4,128,0,1,float16,fp8,0,1.301258643468221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,float16,0,1.3302773634592693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,float16,0,0.769536018371582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,8,128,0,1,fp8,fp8,0,1.1590080261230469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,32,8,128,0,1,float16,fp8,0,1.3286933104197185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,32,128,0,1,fp8,fp8,0,0.6773866812388102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,float16,0,0.6627413431803385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,32,128,0,1,float16,fp8,0,0.7847680250803629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,1,128,0,1,fp8,fp8,0,0.5921119848887125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,1,128,0,1,float16,fp8,0,0.6680266857147217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,float16,0,0.6666346788406372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,2,128,0,1,float16,fp8,0,0.6748212973276774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,2,128,0,1,fp8,fp8,0,0.5944746732711792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,float16,0,0.6786346435546875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,4,128,0,1,float16,fp8,0,0.6768480141957601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,4,128,0,1,fp8,fp8,0,0.5991040070851644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,float16,0,0.4319946765899658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,float16,0,0.6952213446299235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,8,128,0,1,float16,fp8,0,0.7012800375620524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,32,8,128,0,1,fp8,fp8,0,0.6083360115687052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,32,128,0,1,fp8,fp8,0,0.3703999916712443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,32,128,0,1,float16,fp8,0,0.42484267552693683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,float16,0,0.3607199986775716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,1,128,0,1,float16,fp8,0,0.3582293192545573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,1,128,0,1,fp8,fp8,0,0.3267093300819397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,fp8,0,0.36693334579467773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,2,128,0,1,float16,float16,0,0.362613320350647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,2,128,0,1,fp8,fp8,0,0.32862399021784466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,float16,0,0.36932798226674396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,4,128,0,1,float16,fp8,0,0.3715680042902629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,4,128,0,1,fp8,fp8,0,0.33085866769154865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,float16,0,0.37588266531626385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,float16,0,0.24205867449442545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,32,128,0,1,float16,fp8,0,0.24447466929753622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,8,128,0,1,fp8,fp8,0,0.33602134386698407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,32,8,128,0,1,float16,fp8,0,0.37965333461761475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,32,128,0,1,fp8,fp8,0,0.21581866343816122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,float16,0,0.203274667263031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,1,128,0,1,fp8,fp8,0,0.1898720065752665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,1,128,0,1,float16,fp8,0,0.20383999745051065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,fp8,0,0.2048799991607666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,2,128,0,1,float16,float16,0,0.20475733280181885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,2,128,0,1,fp8,fp8,0,0.19185600678126016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,float16,0,0.20828799406687418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,4,128,0,1,float16,fp8,0,0.20969067017237344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,4,128,0,1,fp8,fp8,0,0.1943946679433187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,float16,0,0.21482133865356445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,8,128,0,1,float16,fp8,0,0.21570666631062826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,32,8,128,0,1,fp8,fp8,0,0.19831466674804688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,float16,0,0.14843733112017313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,32,128,0,1,float16,fp8,0,0.14945066968599954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,32,128,0,1,fp8,fp8,0,0.13716800014177957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,float16,0,0.13025599718093872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,1,128,0,1,float16,fp8,0,0.13157866398493448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,1,128,0,1,fp8,fp8,0,0.12353066603342693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,float16,0,0.13171733419100443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,2,128,0,1,float16,fp8,0,0.13190399607022604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,2,128,0,1,fp8,fp8,0,0.12392533818880717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,float16,0,0.1304213305314382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,4,128,0,1,float16,fp8,0,0.1316106617450714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,4,128,0,1,fp8,fp8,0,0.12489599982897441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,float16,0,0.13103999694188437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,8,128,0,1,float16,fp8,0,0.1341919998327891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,32,8,128,0,1,fp8,fp8,0,0.12402133146921794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,1,128,0,1,fp8,fp8,0,2.4827146530151367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,float16,0,2.8129440943400064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,1,128,0,1,float16,fp8,0,2.8296906153361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,float16,0,2.8084640502929688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,2,128,0,1,fp8,fp8,0,2.502789338429769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,2,128,0,1,float16,fp8,0,2.8414827982584634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,float16,0,2.8594347635904946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,4,128,0,1,float16,fp8,0,2.8543678919474282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,4,128,0,1,fp8,fp8,0,2.52457062403361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,float16,0,2.9277013142903647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,8,128,0,1,fp8,fp8,0,2.5846187273661294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,32,8,128,0,1,float16,fp8,0,2.959792137145996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,float16,0,1.6435680389404297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,32,128,0,1,fp8,fp8,0,1.4574346542358398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,32,128,0,1,float16,fp8,0,1.6611679395039876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,float16,0,1.401301383972168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,1,128,0,1,float16,fp8,0,1.4048320452372234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,1,128,0,1,fp8,fp8,0,1.2462879816691081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,float16,0,1.4106666247049968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,2,128,0,1,float16,fp8,0,1.4190346399943035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,2,128,0,1,fp8,fp8,0,1.256981372833252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,float16,0,1.419904073079427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,4,128,0,1,fp8,fp8,0,1.2667413552602131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,4,128,0,1,float16,fp8,0,1.4324746131896973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,float16,0,1.4673867225646973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,float16,0,0.8419199784596761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,8,128,0,1,float16,fp8,0,1.4713706970214844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,32,8,128,0,1,fp8,fp8,0,1.2955786387125652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,32,128,0,1,float16,fp8,0,0.8570666313171387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,32,128,0,1,fp8,fp8,0,0.7509013017018636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,float16,0,0.7209546566009521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,1,128,0,1,float16,fp8,0,0.7227839628855387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,1,128,0,1,fp8,fp8,0,0.644378662109375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,float16,0,0.7260639667510986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,2,128,0,1,float16,fp8,0,0.7309760252634684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,2,128,0,1,fp8,fp8,0,0.6471039851506551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,float16,0,0.7347893714904785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,4,128,0,1,float16,fp8,0,0.739408016204834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,4,128,0,1,fp8,fp8,0,0.6524693171183268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,float16,0,0.7566239833831787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,8,128,0,1,float16,fp8,0,0.7611680030822754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,32,8,128,0,1,fp8,fp8,0,0.6683200200398763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,float16,0,0.4453546603520711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,32,128,0,1,float16,fp8,0,0.4543573458989461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,float16,0,0.3795359929402669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,32,128,0,1,fp8,fp8,0,0.40110401312510174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,1,128,0,1,float16,fp8,0,0.3832799990971883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,1,128,0,1,fp8,fp8,0,0.3468693494796753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,float16,0,0.38286399841308594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,2,128,0,1,float16,fp8,0,0.38866134484608966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,2,128,0,1,fp8,fp8,0,0.34781332810719806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,float16,0,0.38841601212819415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,4,128,0,1,float16,fp8,0,0.3918986717859904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,4,128,0,1,fp8,fp8,0,0.35122132301330566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,float16,0,0.3998986482620239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,8,128,0,1,float16,fp8,0,0.40458667278289795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,float16,0,0.24684800704320273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,32,8,128,0,1,fp8,fp8,0,0.3585333426793416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,32,128,0,1,float16,fp8,0,0.25231999158859253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,float16,0,0.20640534162521362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,32,128,0,1,fp8,fp8,0,0.22454400857289633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,1,128,0,1,fp8,fp8,0,0.19473065932591757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,1,128,0,1,float16,fp8,0,0.21077332894007364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,2,128,0,1,fp8,fp8,0,0.197434663772583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,float16,0,0.20999467372894287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,2,128,0,1,float16,fp8,0,0.20972265799840292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,float16,0,0.21477333704630533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,4,128,0,1,float16,fp8,0,0.21674132347106934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,4,128,0,1,fp8,fp8,0,0.19771732886632284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,float16,0,0.2204213341077169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,8,128,0,1,fp8,fp8,0,0.20322666565577188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,32,8,128,0,1,float16,fp8,0,0.22527466217676798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,float16,0,0.14657066265741983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,32,128,0,1,float16,fp8,0,0.14818666378657022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,32,128,0,1,fp8,fp8,0,0.13409599661827087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,fp8,0,0.1234879990418752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,1,128,0,1,float16,float16,0,0.12398399909337361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,1,128,0,1,fp8,fp8,0,0.11166399717330933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,float16,0,0.12405866384506226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,2,128,0,1,float16,fp8,0,0.12404800454775493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,float16,0,0.12386666735013326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,2,128,0,1,fp8,fp8,0,0.11403733491897583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,4,128,0,1,float16,fp8,0,0.12470933794975281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,4,128,0,1,fp8,fp8,0,0.11594133575757344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,float16,0,0.12614400188128153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,8,128,0,1,float16,fp8,0,0.12610133488972983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,32,8,128,0,1,fp8,fp8,0,0.12168533603350322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,float16,0,0.08904533584912618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,32,128,0,1,float16,fp8,0,0.0906826655069987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,float16,0,0.08656533559163411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,32,128,0,1,fp8,fp8,0,0.08741866548856099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,1,128,0,1,float16,fp8,0,0.08695466319719951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,1,128,0,1,fp8,fp8,0,0.08071466783682506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,float16,0,0.08506666620572408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,2,128,0,1,float16,fp8,0,0.08726400136947632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,2,128,0,1,fp8,fp8,0,0.0819893330335617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,float16,0,0.08620267113049825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,4,128,0,1,float16,fp8,0,0.08588799834251404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,4,128,0,1,fp8,fp8,0,0.08108266691366832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,float16,0,0.08661333719889323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,8,128,0,1,fp8,fp8,0,0.08276799817879994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,32,8,128,0,1,float16,fp8,0,0.08716266353925069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,1,128,0,1,fp8,fp8,0,1.6945279439290364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,float16,0,1.8927253087361653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,1,128,0,1,float16,fp8,0,1.9103093147277832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,float16,0,1.907957394917806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,2,128,0,1,float16,fp8,0,1.92902406056722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,2,128,0,1,fp8,fp8,0,1.7074133555094402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,float16,0,1.9393706321716309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,4,128,0,1,float16,fp8,0,1.938170591990153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,4,128,0,1,fp8,fp8,0,1.7242239316304524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,float16,0,1.9844853083292644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,8,128,0,1,fp8,fp8,0,1.7629119555155437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,32,8,128,0,1,float16,fp8,0,2.0028692881266275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,float16,0,1.124293327331543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,32,128,0,1,fp8,fp8,0,1.011957327524821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,float16,0,0.9518453280131022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,32,128,0,1,float16,fp8,0,1.1409706274668376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,1,128,0,1,float16,fp8,0,0.956378698348999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,1,128,0,1,fp8,fp8,0,0.8546613057454427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,float16,0,0.9577759901682535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,2,128,0,1,float16,fp8,0,0.9626133441925049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,2,128,0,1,fp8,fp8,0,0.8607306480407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,float16,0,0.9684906800587972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,4,128,0,1,fp8,fp8,0,0.871071974436442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,4,128,0,1,float16,fp8,0,0.9796640078226725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,float16,0,1.0033653577168782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,8,128,0,1,float16,fp8,0,1.0116693178812664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,32,8,128,0,1,fp8,fp8,0,0.8940906524658203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,float16,0,0.5825706720352173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,32,128,0,1,float16,fp8,0,0.5909920136133829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,32,128,0,1,fp8,fp8,0,0.5254720052083334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,float16,0,0.49347201983133954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,1,128,0,1,fp8,fp8,0,0.44627734025319415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,1,128,0,1,float16,fp8,0,0.49565335114796955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,float16,0,0.49699199199676514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,2,128,0,1,float16,fp8,0,0.5001279910405477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,2,128,0,1,fp8,fp8,0,0.4490400155385335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,float16,0,0.5042186578114828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,4,128,0,1,fp8,fp8,0,0.45480533440907794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,4,128,0,1,float16,fp8,0,0.5072533289591471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,float16,0,0.5200106700261434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,8,128,0,1,float16,fp8,0,0.5241653521855673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,32,8,128,0,1,fp8,fp8,0,0.46670933564503986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,float16,0,0.31063999732335407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,float16,0,0.26440000534057617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,32,128,0,1,fp8,fp8,0,0.2845919926961263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,32,128,0,1,float16,fp8,0,0.31877867380777997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,1,128,0,1,float16,fp8,0,0.26612265904744464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,1,128,0,1,fp8,fp8,0,0.24288000663121542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,float16,0,0.2651519974072774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,2,128,0,1,float16,fp8,0,0.26789865891138714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,2,128,0,1,fp8,fp8,0,0.24362132946650186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,float16,0,0.2696213324864705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,4,128,0,1,float16,fp8,0,0.2723413308461507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,4,128,0,1,fp8,fp8,0,0.2468000054359436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,float16,0,0.2789493401845296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,8,128,0,1,fp8,fp8,0,0.2546773354212443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,32,8,128,0,1,float16,fp8,0,0.28353599707285565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,float16,0,0.1749173402786255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,float16,0,0.14542933305104574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,32,128,0,1,fp8,fp8,0,0.16156799594561258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,32,128,0,1,float16,fp8,0,0.18007999658584595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,1,128,0,1,float16,fp8,0,0.14574399590492249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,1,128,0,1,fp8,fp8,0,0.13802666465441385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,float16,0,0.1462559998035431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,2,128,0,1,fp8,fp8,0,0.1400266687075297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,float16,0,0.14863466223080954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,4,128,0,1,float16,fp8,0,0.14857600132624307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,2,128,0,1,float16,fp8,0,0.14788800477981567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,4,128,0,1,fp8,fp8,0,0.14083199699719748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,float16,0,0.1556426684061686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,8,128,0,1,float16,fp8,0,0.15569600462913513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,32,8,128,0,1,fp8,fp8,0,0.1464959979057312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,float16,0,0.1035146713256836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,32,128,0,1,float16,fp8,0,0.10474666953086853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,32,128,0,1,fp8,fp8,0,0.09936533371607463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,fp8,0,0.09072533249855042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,1,128,0,1,float16,float16,0,0.09098666906356812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,1,128,0,1,fp8,fp8,0,0.08344533046086629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,float16,0,0.08914666374524434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,2,128,0,1,float16,fp8,0,0.09102933605511983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,2,128,0,1,fp8,fp8,0,0.08319999774297078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,float16,0,0.09113066395123799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,4,128,0,1,fp8,fp8,0,0.08545066912968953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,4,128,0,1,float16,fp8,0,0.09139733513196309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,float16,0,0.09104532996813457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,8,128,0,1,float16,fp8,0,0.09287466605504353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,32,8,128,0,1,fp8,fp8,0,0.08692266543706258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,float16,0,0.06685866912206014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,32,128,0,1,float16,fp8,0,0.0689333329598109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,float16,0,0.0666240006685257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,32,128,0,1,fp8,fp8,0,0.06418666740258534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,1,128,0,1,float16,fp8,0,0.06620799998442332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,1,128,0,1,fp8,fp8,0,0.06234666705131531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,float16,0,0.06632533172766368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,2,128,0,1,float16,fp8,0,0.06433066725730896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,2,128,0,1,fp8,fp8,0,0.06264000137646993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,float16,0,0.06468266745408376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,4,128,0,1,float16,fp8,0,0.0658133327960968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,4,128,0,1,fp8,fp8,0,0.061893333991368614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,float16,0,0.06451199948787689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,8,128,0,1,float16,fp8,0,0.06643733382225037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,32,8,128,0,1,fp8,fp8,0,0.062218666076660156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,float16,0,2.046677271525065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,1,128,0,1,float16,fp8,0,2.0327839851379395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,1,128,0,1,fp8,fp8,0,1.9317919413248699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,float16,0,2.0941120783487954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,2,128,0,1,fp8,fp8,0,1.9572854042053223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,2,128,0,1,float16,fp8,0,2.063781261444092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,float16,0,2.2451626459757485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,4,128,0,1,float16,fp8,0,2.22216002146403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,float16,0,2.2767626444498696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,4,128,0,1,fp8,fp8,0,2.4107413291931152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,8,128,0,1,float16,fp8,0,2.2379786173502603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,32,8,128,0,1,fp8,fp8,0,2.390069325764974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,float16,0,1.217098633448283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,32,128,0,1,float16,fp8,0,1.219109296798706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,32,128,0,1,fp8,fp8,0,1.1924586296081543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,float16,0,1.0326560338338215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,1,128,0,1,fp8,fp8,0,0.9741546312967936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,1,128,0,1,float16,fp8,0,1.024399995803833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,fp8,0,1.041050672531128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,2,128,0,1,float16,float16,0,1.0479946931203206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,2,128,0,1,fp8,fp8,0,0.9870879650115967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,float16,0,1.1034133434295654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,4,128,0,1,float16,fp8,0,1.0889920393625896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,4,128,0,1,fp8,fp8,0,1.1796053250630696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,float16,0,1.11736003557841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,8,128,0,1,float16,fp8,0,1.0956586996714275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,32,8,128,0,1,fp8,fp8,0,1.1465333302815754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,float16,0,0.6224799950917562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,32,128,0,1,float16,fp8,0,0.6156053145726522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,32,128,0,1,fp8,fp8,0,0.6021493275960287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,float16,0,0.529312014579773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,1,128,0,1,float16,fp8,0,0.5271893342336019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,1,128,0,1,fp8,fp8,0,0.49692801634470624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,float16,0,0.5375359853108724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,2,128,0,1,float16,fp8,0,0.5346666574478149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,2,128,0,1,fp8,fp8,0,0.5065173308054606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,float16,0,0.558789332707723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,4,128,0,1,float16,fp8,0,0.5551199913024902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,4,128,0,1,fp8,fp8,0,0.577733318010966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,float16,0,0.5661280155181885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,8,128,0,1,float16,fp8,0,0.5605706771214803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,32,8,128,0,1,fp8,fp8,0,0.5546186765034994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,float16,0,0.3245493372281392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,32,128,0,1,float16,fp8,0,0.31903467575709027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,32,128,0,1,fp8,fp8,0,0.3135733405749003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,float16,0,0.27738134066263836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,1,128,0,1,float16,fp8,0,0.2749759952227275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,1,128,0,1,fp8,fp8,0,0.2588319977124532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,float16,0,0.2823999921480815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,2,128,0,1,float16,fp8,0,0.2791946729024251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,2,128,0,1,fp8,fp8,0,0.26581867535909015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,float16,0,0.2930506666501363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,4,128,0,1,float16,fp8,0,0.29130132993062335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,4,128,0,1,fp8,fp8,0,0.28755199909210205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,float16,0,0.2953866720199585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,8,128,0,1,float16,fp8,0,0.2942453424135844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,fp8,0,0.1744746764500936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,32,128,0,1,float16,float16,0,0.1768959959348043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,32,128,0,1,fp8,fp8,0,0.1673706571261088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,32,8,128,0,1,fp8,fp8,0,0.29053332408269245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,float16,0,0.14857600132624307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,1,128,0,1,float16,fp8,0,0.1474240024884542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,1,128,0,1,fp8,fp8,0,0.13684800267219543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,float16,0,0.1506666640440623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,2,128,0,1,float16,fp8,0,0.15060800313949585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,2,128,0,1,fp8,fp8,0,0.1423360009988149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,float16,0,0.15844266613324484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,4,128,0,1,float16,fp8,0,0.15787733594576517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,4,128,0,1,fp8,fp8,0,0.15217066804567972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,float16,0,0.1604693333307902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,8,128,0,1,float16,fp8,0,0.16078399618466696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,32,8,128,0,1,fp8,fp8,0,0.15530666708946228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,float16,0,0.10322667161623637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,32,128,0,1,float16,fp8,0,0.1016533374786377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,32,128,0,1,fp8,fp8,0,0.09630399942398071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,float16,0,0.0830026666323344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,1,128,0,1,float16,fp8,0,0.08294400076071422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,1,128,0,1,fp8,fp8,0,0.07861333092053731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,float16,0,0.08499733606974284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,2,128,0,1,float16,fp8,0,0.08335999647776286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,2,128,0,1,fp8,fp8,0,0.07669333120187123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,float16,0,0.08708799878756206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,4,128,0,1,float16,fp8,0,0.08601599931716919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,4,128,0,1,fp8,fp8,0,0.08497066299120586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,float16,0,0.08712533116340637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,8,128,0,1,float16,fp8,0,0.0869706670443217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,float16,0,0.05599466462930044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,32,8,128,0,1,fp8,fp8,0,0.08691733082135518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,32,128,0,1,float16,fp8,0,0.05436799923578898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,32,128,0,1,fp8,fp8,0,0.05563200016816457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,float16,0,0.0518506666024526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,1,128,0,1,float16,fp8,0,0.050527999798456825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,1,128,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,fp8,0,0.05180799961090088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,2,128,0,1,float16,float16,0,0.05213333169619242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,2,128,0,1,fp8,fp8,0,0.04774933556715647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,float16,0,0.05384533107280731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,4,128,0,1,float16,fp8,0,0.053946668903032936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,4,128,0,1,fp8,fp8,0,0.05203733344872793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,float16,0,0.05435733497142792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,8,128,0,1,fp8,fp8,0,0.051669334371884666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,32,8,128,0,1,float16,fp8,0,0.05351466437180837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,float16,0,0.03603200117746989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,32,128,0,1,float16,fp8,0,0.03623999903599421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,32,128,0,1,fp8,fp8,0,0.035642666121323906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,float16,0,0.033802665770053864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,1,128,0,1,float16,fp8,0,0.03392533212900162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,1,128,0,1,fp8,fp8,0,0.033930666744709015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,float16,0,0.035743998984495796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,2,128,0,1,float16,fp8,0,0.03586666782697042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,2,128,0,1,fp8,fp8,0,0.03348266581694285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,float16,0,0.03585066646337509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,4,128,0,1,float16,fp8,0,0.03528533379236857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,4,128,0,1,fp8,fp8,0,0.03559466699759165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,float16,0,0.03594133257865906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,8,128,0,1,float16,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,32,8,128,0,1,fp8,fp8,0,0.03389333436886469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,1,128,0,1,fp8,fp8,0,1.691941261291504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,1,128,0,1,float16,fp8,0,1.7730186780293782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,2,128,0,1,float16,float16,0,1.8228373527526855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,1,128,0,1,float16,float16,0,1.8075307210286458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,2,128,0,1,fp8,fp8,0,1.7214293479919434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,2,128,0,1,float16,fp8,0,1.7960586547851562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,4,128,0,1,float16,float16,0,1.9597066243489583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,4,128,0,1,float16,fp8,0,1.9443786938985188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,4,128,0,1,fp8,fp8,0,2.166986624399821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,8,128,0,1,float16,float16,0,1.9747254053751628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,8,128,0,1,float16,fp8,0,1.9403467178344727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,32,128,0,1,float16,float16,0,1.0668266614278157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,32,128,0,1,float16,fp8,0,1.074560006459554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,32,8,128,0,1,fp8,fp8,0,2.1613920529683432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,32,128,0,1,fp8,fp8,0,1.07205335299174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,1,128,0,1,float16,float16,0,0.9008320172627767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,1,128,0,1,float16,fp8,0,0.8942826588948568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,1,128,0,1,fp8,fp8,0,0.8541653156280518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,2,128,0,1,float16,float16,0,0.9143839677174886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,2,128,0,1,float16,fp8,0,0.9064053694407145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,2,128,0,1,fp8,fp8,0,0.8681440353393555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,4,128,0,1,float16,float16,0,0.9643466472625732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,4,128,0,1,float16,fp8,0,0.9468692938486735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,8,128,0,1,float16,float16,0,0.9669280052185059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,4,128,0,1,fp8,fp8,0,1.0733119646708171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,8,128,0,1,float16,fp8,0,0.9497120380401611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,32,128,0,1,float16,float16,0,0.542853315671285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,32,128,0,1,float16,fp8,0,0.5375200112660726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,32,128,0,1,fp8,fp8,0,0.5386079947153727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,32,8,128,0,1,fp8,fp8,0,1.0288639863332112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,1,128,0,1,float16,float16,0,0.4614666700363159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,1,128,0,1,float16,fp8,0,0.459663987159729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,1,128,0,1,fp8,fp8,0,0.43567999203999835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,2,128,0,1,float16,float16,0,0.4697653452555339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,2,128,0,1,float16,fp8,0,0.4659946759541829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,2,128,0,1,fp8,fp8,0,0.445685346921285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,4,128,0,1,float16,float16,0,0.487498680750529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,4,128,0,1,float16,fp8,0,0.48398931821187335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,4,128,0,1,fp8,fp8,0,0.5183200041453043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,8,128,0,1,float16,float16,0,0.49192531903584796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,8,128,0,1,float16,fp8,0,0.48484798272450763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,32,128,0,1,float16,float16,0,0.28401599327723187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,32,128,0,1,float16,fp8,0,0.27847466866175336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,32,8,128,0,1,fp8,fp8,0,0.5221226612726847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,32,128,0,1,fp8,fp8,0,0.2810720006624858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,1,128,0,1,float16,float16,0,0.2429706652959188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,1,128,0,1,float16,fp8,0,0.24012800057729086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,1,128,0,1,fp8,fp8,0,0.22563733657201132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,2,128,0,1,float16,float16,0,0.24549333254496256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,2,128,0,1,float16,fp8,0,0.2446933388710022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,2,128,0,1,fp8,fp8,0,0.23123733202616373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,4,128,0,1,float16,float16,0,0.25469332933425903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,4,128,0,1,float16,fp8,0,0.2529226740201314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,8,128,0,1,float16,float16,0,0.25708266099294025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,4,128,0,1,fp8,fp8,0,0.2542133331298828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,8,128,0,1,float16,fp8,0,0.25434666872024536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,32,128,0,1,float16,float16,0,0.15435199936230978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,32,128,0,1,float16,fp8,0,0.15253333250681558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,32,8,128,0,1,fp8,fp8,0,0.256602664788564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,32,128,0,1,fp8,fp8,0,0.14989866813023886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,1,128,0,1,float16,float16,0,0.12994133432706198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,1,128,0,1,float16,fp8,0,0.128602663675944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,1,128,0,1,fp8,fp8,0,0.12026133139928182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,2,128,0,1,float16,float16,0,0.13176533579826355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,2,128,0,1,float16,fp8,0,0.13050666451454163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,4,128,0,1,float16,float16,0,0.1369493305683136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,2,128,0,1,fp8,fp8,0,0.12504000465075174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,4,128,0,1,float16,fp8,0,0.13616533080736795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,8,128,0,1,float16,float16,0,0.13829333583513895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,4,128,0,1,fp8,fp8,0,0.13368533054987589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,8,128,0,1,float16,fp8,0,0.13834133744239807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,32,8,128,0,1,fp8,fp8,0,0.1370560030142466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,32,128,0,1,float16,float16,0,0.08938666184743245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,32,128,0,1,float16,fp8,0,0.08789333701133728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,32,128,0,1,fp8,fp8,0,0.08629332979520161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,1,128,0,1,float16,float16,0,0.07287999987602234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,1,128,0,1,float16,fp8,0,0.07234666744867961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,1,128,0,1,fp8,fp8,0,0.06851733227570851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,2,128,0,1,float16,float16,0,0.0728000005086263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,2,128,0,1,float16,fp8,0,0.07238399982452393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,2,128,0,1,fp8,fp8,0,0.06750399867693584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,4,128,0,1,float16,float16,0,0.07543466488520305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,4,128,0,1,float16,fp8,0,0.07512533167997996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,4,128,0,1,fp8,fp8,0,0.07451733450094859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,8,128,0,1,float16,float16,0,0.0767626663049062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,8,128,0,1,float16,fp8,0,0.07474133372306824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,32,128,0,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,32,128,0,1,float16,float16,0,0.047637333472569786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,32,8,128,0,1,fp8,fp8,0,0.07594666878382365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,32,128,0,1,fp8,fp8,0,0.04775466521581014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,1,128,0,1,float16,float16,0,0.044266665975252785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,1,128,0,1,float16,fp8,0,0.043696001172065735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,1,128,0,1,fp8,fp8,0,0.04201599955558777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,2,128,0,1,float16,float16,0,0.04389866689840952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,2,128,0,1,float16,fp8,0,0.04479999840259552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,2,128,0,1,fp8,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,4,128,0,1,float16,float16,0,0.04692266881465912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,4,128,0,1,float16,fp8,0,0.045797333121299744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,4,128,0,1,fp8,fp8,0,0.044250667095184326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,8,128,0,1,float16,float16,0,0.04603200157483419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,32,128,0,1,float16,float16,0,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,8,128,0,1,float16,fp8,0,0.04663466910521189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,32,8,128,0,1,fp8,fp8,0,0.04578666885693868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,32,128,0,1,float16,fp8,0,0.03145600110292435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,32,128,0,1,fp8,fp8,0,0.03129599988460541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,1,128,0,1,float16,fp8,0,0.02924266705910365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,1,128,0,1,float16,float16,0,0.03010133405526479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,1,128,0,1,fp8,fp8,0,0.029264000554879505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,2,128,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,2,128,0,1,float16,fp8,0,0.030293333033720653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,2,128,0,1,fp8,fp8,0,0.02805333336194356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,4,128,0,1,float16,float16,0,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,4,128,0,1,float16,fp8,0,0.030271999537944794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,4,128,0,1,fp8,fp8,0,0.02935466667016347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,8,128,0,1,float16,float16,0,0.029445332785447437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,8,128,0,1,float16,fp8,0,0.029872000217437744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,32,8,128,0,1,fp8,fp8,0,0.031199999153614044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,32,128,0,1,float16,float16,0,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,32,128,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,32,128,0,1,float16,fp8,0,0.025573333104451496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,1,128,0,1,float16,float16,0,0.025221332907676697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,1,128,0,1,float16,fp8,0,0.025514667232831318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,1,128,0,1,fp8,fp8,0,0.023647998770078022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,2,128,0,1,float16,float16,0,0.02418133368094762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,2,128,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,2,128,0,1,fp8,fp8,0,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,4,128,0,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,4,128,0,1,float16,fp8,0,0.025125332176685333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,4,128,0,1,fp8,fp8,0,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,8,128,0,1,float16,float16,0,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,8,128,0,1,float16,fp8,0,0.025397333006064098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,32,8,128,0,1,fp8,fp8,0,0.02386133372783661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,1,128,0,1,float16,fp8,0,0.8260160287221273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,1,128,0,1,float16,float16,0,0.8330506483713785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,1,128,0,1,fp8,fp8,0,0.7924959659576416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,2,128,0,1,fp8,fp8,0,0.808895985285441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,2,128,0,1,float16,float16,0,0.8431200186411539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,2,128,0,1,float16,fp8,0,0.8364906311035156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,4,128,0,1,float16,fp8,0,0.8804480234781901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,4,128,0,1,float16,float16,0,0.8980639775594076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,4,128,0,1,fp8,fp8,0,1.0068106651306152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,8,128,0,1,float16,fp8,0,0.8753706614176432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,8,128,0,1,float16,float16,0,0.8950560092926025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,32,8,128,0,1,fp8,fp8,0,1.0045173168182373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,32,128,0,1,float16,fp8,0,0.4947306712468465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,32,128,0,1,fp8,fp8,0,0.5038613478342692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,32,128,0,1,float16,float16,0,0.5049973328908285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,1,128,0,1,float16,float16,0,0.4261600176493327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,1,128,0,1,float16,fp8,0,0.4225813150405884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,1,128,0,1,fp8,fp8,0,0.40372800827026367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,2,128,0,1,float16,fp8,0,0.428111990292867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,2,128,0,1,float16,float16,0,0.43068798383076984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,2,128,0,1,fp8,fp8,0,0.4124639828999837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,4,128,0,1,float16,float16,0,0.44727468490600586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,4,128,0,1,float16,fp8,0,0.4435733159383138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,4,128,0,1,fp8,fp8,0,0.48923198382059735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,8,128,0,1,float16,float16,0,0.4512159824371338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,8,128,0,1,float16,fp8,0,0.44576001167297363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,32,128,0,1,float16,float16,0,0.26707732677459717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,32,8,128,0,1,fp8,fp8,0,0.49409600098927814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,32,128,0,1,float16,fp8,0,0.261407991250356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,1,128,0,1,float16,float16,0,0.22378132740656534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,32,128,0,1,fp8,fp8,0,0.2634506622950236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,1,128,0,1,float16,fp8,0,0.2227519949277242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,1,128,0,1,fp8,fp8,0,0.21010667085647583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,2,128,0,1,float16,float16,0,0.22751466433207193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,2,128,0,1,float16,fp8,0,0.22595733404159546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,2,128,0,1,fp8,fp8,0,0.21649599075317383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,4,128,0,1,float16,fp8,0,0.23374400536219278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,4,128,0,1,float16,float16,0,0.23493866125742593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,4,128,0,1,fp8,fp8,0,0.23893866936365762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,8,128,0,1,float16,float16,0,0.23833600680033365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,8,128,0,1,float16,fp8,0,0.23493333657582602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,32,128,0,1,float16,float16,0,0.1441439986228943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,32,8,128,0,1,fp8,fp8,0,0.24098666508992514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,32,128,0,1,float16,fp8,0,0.14144532879193625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,32,128,0,1,fp8,fp8,0,0.14230400323867798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,1,128,0,1,float16,float16,0,0.11793599526087443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,1,128,0,1,float16,fp8,0,0.11583466331164043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,1,128,0,1,fp8,fp8,0,0.1109226644039154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,2,128,0,1,float16,float16,0,0.11959999799728394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,2,128,0,1,float16,fp8,0,0.11921599507331848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,2,128,0,1,fp8,fp8,0,0.11422933141390483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,4,128,0,1,float16,float16,0,0.12595199545224509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,4,128,0,1,float16,fp8,0,0.12597866853078207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,4,128,0,1,fp8,fp8,0,0.12653332948684692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,8,128,0,1,float16,float16,0,0.12821867068608603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,8,128,0,1,float16,fp8,0,0.12813867131868997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,32,8,128,0,1,fp8,fp8,0,0.12873066465059915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,32,128,0,1,float16,float16,0,0.08427733182907104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,32,128,0,1,float16,fp8,0,0.08230400085449219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,32,128,0,1,fp8,fp8,0,0.08111999928951263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,1,128,0,1,float16,float16,0,0.06840533514817555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,1,128,0,1,float16,fp8,0,0.06678933401902516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,1,128,0,1,fp8,fp8,0,0.06253333389759064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,2,128,0,1,float16,float16,0,0.06700799862543742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,2,128,0,1,float16,fp8,0,0.06820799907048543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,2,128,0,1,fp8,fp8,0,0.06243733565012614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,4,128,0,1,float16,float16,0,0.07066133121649425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,4,128,0,1,float16,fp8,0,0.07000533243020375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,4,128,0,1,fp8,fp8,0,0.06875733534495036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,8,128,0,1,float16,float16,0,0.07092800239721934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,8,128,0,1,float16,fp8,0,0.07031466563542683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,32,128,0,1,float16,float16,0,0.04208533465862274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,32,8,128,0,1,fp8,fp8,0,0.07270933190981548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,1,128,0,1,float16,float16,0,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,32,128,0,1,float16,fp8,0,0.042634665966033936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,32,128,0,1,fp8,fp8,0,0.04435733457406362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,1,128,0,1,float16,fp8,0,0.03980266551176707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,1,128,0,1,fp8,fp8,0,0.037834666669368744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,2,128,0,1,float16,float16,0,0.03972800076007843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,2,128,0,1,float16,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,2,128,0,1,fp8,fp8,0,0.0378506655494372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,4,128,0,1,float16,float16,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,4,128,0,1,float16,fp8,0,0.04181866844495138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,4,128,0,1,fp8,fp8,0,0.04167466859022776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,8,128,0,1,float16,float16,0,0.04180799921353658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,8,128,0,1,float16,fp8,0,0.041450666884581246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,32,128,0,1,float16,float16,0,0.02959466725587845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,32,8,128,0,1,fp8,fp8,0,0.04234133164087931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,32,128,0,1,float16,fp8,0,0.02914133419593175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,32,128,0,1,fp8,fp8,0,0.028325334191322327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,1,128,0,1,float16,float16,0,0.027866666515668232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,1,128,0,1,float16,fp8,0,0.027802666028340656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,2,128,0,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,1,128,0,1,fp8,fp8,0,0.02552533398071925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,2,128,0,1,float16,float16,0,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,2,128,0,1,fp8,fp8,0,0.026517334083716076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,4,128,0,1,float16,float16,0,0.029637334247430164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,4,128,0,1,float16,fp8,0,0.027589333554108936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,4,128,0,1,fp8,fp8,0,0.02739199995994568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,8,128,0,1,float16,float16,0,0.02922666569550832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,8,128,0,1,float16,fp8,0,0.02755733331044515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,32,8,128,0,1,fp8,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,32,128,0,1,float16,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,32,128,0,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,32,128,0,1,fp8,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,1,128,0,1,float16,float16,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,1,128,0,1,fp8,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,1,128,0,1,float16,fp8,0,0.023541333774725597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,2,128,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,2,128,0,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,2,128,0,1,fp8,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,4,128,0,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,4,128,0,1,float16,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,4,128,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,8,128,0,1,float16,fp8,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,8,128,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,32,8,128,0,1,fp8,fp8,0,0.021712000171343487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,32,128,0,1,float16,float16,0,0.02089600016673406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,32,128,0,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,32,128,0,1,fp8,fp8,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,1,128,0,1,float16,float16,0,0.02091199904680252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,1,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,1,128,0,1,fp8,fp8,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,2,128,0,1,float16,float16,0,0.019679999599854153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,2,128,0,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,4,128,0,1,float16,float16,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,2,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,4,128,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,4,128,0,1,fp8,fp8,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,8,128,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,8,128,0,1,float16,fp8,0,0.019621333728233974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,32,8,128,0,1,fp8,fp8,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,1,128,0,1,float16,float16,0,0.456767996152242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,1,128,0,1,fp8,fp8,0,0.43534934520721436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,1,128,0,1,float16,fp8,0,0.4503626823425293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,2,128,0,1,float16,float16,0,0.4684586524963379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,2,128,0,1,float16,fp8,0,0.46124267578125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,2,128,0,1,fp8,fp8,0,0.4457919994990031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,4,128,0,1,float16,fp8,0,0.48233067989349365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,4,128,0,1,float16,float16,0,0.48762667179107666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,4,128,0,1,fp8,fp8,0,0.5135466655095419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,8,128,0,1,float16,float16,0,0.4887946844100952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,8,128,0,1,float16,fp8,0,0.4819519917170207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,32,8,128,0,1,fp8,fp8,0,0.495685338973999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,32,128,0,1,float16,fp8,0,0.268559992313385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,32,128,0,1,float16,float16,0,0.27491732438405353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,32,128,0,1,fp8,fp8,0,0.2767840027809143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,1,128,0,1,float16,float16,0,0.23691733678181967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,1,128,0,1,float16,fp8,0,0.23292799790700278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,1,128,0,1,fp8,fp8,0,0.2238346735636393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,2,128,0,1,float16,fp8,0,0.24006932973861694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,2,128,0,1,float16,float16,0,0.24437334140141806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,2,128,0,1,fp8,fp8,0,0.23051732778549194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,4,128,0,1,float16,float16,0,0.2513013283411662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,4,128,0,1,float16,fp8,0,0.2502560019493103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,4,128,0,1,fp8,fp8,0,0.2521760066350301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,8,128,0,1,float16,float16,0,0.25246934096018475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,8,128,0,1,float16,fp8,0,0.24977066119511923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,32,8,128,0,1,fp8,fp8,0,0.24493332703908285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,32,128,0,1,float16,float16,0,0.14729066689809164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,32,128,0,1,float16,fp8,0,0.14643733700116476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,32,128,0,1,fp8,fp8,0,0.14670399824778238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,1,128,0,1,float16,float16,0,0.12607466181119284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,1,128,0,1,fp8,fp8,0,0.11936533451080322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,1,128,0,1,float16,fp8,0,0.12571733196576437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,2,128,0,1,float16,float16,0,0.1301653285821279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,2,128,0,1,float16,fp8,0,0.1292586624622345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,2,128,0,1,fp8,fp8,0,0.12350400288899739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,4,128,0,1,float16,float16,0,0.1346560021241506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,4,128,0,1,float16,fp8,0,0.1346560021241506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,4,128,0,1,fp8,fp8,0,0.13384532928466797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,8,128,0,1,float16,float16,0,0.1357599993546804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,32,128,0,1,float16,float16,0,0.08256533245245616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,8,128,0,1,float16,fp8,0,0.13478933771451315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,32,128,0,1,float16,fp8,0,0.08303999900817871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,32,8,128,0,1,fp8,fp8,0,0.13590400417645773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,32,128,0,1,fp8,fp8,0,0.08295999964078267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,1,128,0,1,float16,fp8,0,0.06857066849867503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,1,128,0,1,float16,float16,0,0.06850133339564006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,1,128,0,1,fp8,fp8,0,0.0643039991458257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,2,128,0,1,float16,float16,0,0.06828799843788147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,2,128,0,1,float16,fp8,0,0.07083733379840851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,2,128,0,1,fp8,fp8,0,0.06465066472689311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,4,128,0,1,float16,float16,0,0.07275733351707458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,4,128,0,1,float16,fp8,0,0.07307200133800507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,4,128,0,1,fp8,fp8,0,0.07111999889214833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,8,128,0,1,float16,float16,0,0.07277333239714305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,8,128,0,1,float16,fp8,0,0.07267733414967854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,32,8,128,0,1,fp8,fp8,0,0.07400533556938171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,32,128,0,1,float16,float16,0,0.04570133487383524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,32,128,0,1,fp8,fp8,0,0.04562133550643921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,32,128,0,1,float16,fp8,0,0.04552533229192098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,1,128,0,1,float16,float16,0,0.042090664307276406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,1,128,0,1,float16,fp8,0,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,1,128,0,1,fp8,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,2,128,0,1,float16,float16,0,0.04375466704368591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,2,128,0,1,float16,fp8,0,0.043791999419530235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,2,128,0,1,fp8,fp8,0,0.03997866561015447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,4,128,0,1,float16,float16,0,0.04408533374468485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,4,128,0,1,float16,fp8,0,0.04388799766699473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,4,128,0,1,fp8,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,8,128,0,1,float16,float16,0,0.043824002146720886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,8,128,0,1,float16,fp8,0,0.044026667873064675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,32,8,128,0,1,fp8,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,32,128,0,1,float16,fp8,0,0.030432000756263733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,32,128,0,1,float16,float16,0,0.02977599948644638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,1,128,0,1,float16,float16,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,32,128,0,1,fp8,fp8,0,0.029887999097506206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,1,128,0,1,float16,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,1,128,0,1,fp8,fp8,0,0.0272533322374026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,2,128,0,1,float16,float16,0,0.027765333652496338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,2,128,0,1,float16,fp8,0,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,2,128,0,1,fp8,fp8,0,0.027765333652496338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,4,128,0,1,float16,float16,0,0.029258665939172108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,4,128,0,1,float16,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,4,128,0,1,fp8,fp8,0,0.029552000264326733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,8,128,0,1,float16,float16,0,0.02945599953333537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,8,128,0,1,float16,fp8,0,0.030773334205150604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,32,8,128,0,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,32,128,0,1,float16,fp8,0,0.021669333179791767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,32,128,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,32,128,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,1,128,0,1,float16,float16,0,0.02096533278624217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,1,128,0,1,float16,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,1,128,0,1,fp8,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,2,128,0,1,float16,float16,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,2,128,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,2,128,0,1,float16,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,4,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,4,128,0,1,float16,float16,0,0.02011200040578842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,4,128,0,1,fp8,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,8,128,0,1,float16,float16,0,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,8,128,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,32,8,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,32,128,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,32,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,32,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,1,128,0,1,float16,float16,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,1,128,0,1,float16,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,2,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,2,128,0,1,float16,fp8,0,0.017594666530688603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,2,128,0,1,fp8,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,4,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,4,128,0,1,float16,fp8,0,0.017504000415404636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,8,128,0,1,float16,float16,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,4,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,8,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,32,8,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,32,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,32,128,0,1,float16,fp8,0,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,32,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,1,128,0,1,float16,float16,0,0.01666133354107539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,1,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,1,128,0,1,float16,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,2,128,0,1,float16,float16,0,0.016469333320856094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,2,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,2,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,4,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,4,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,4,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,8,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,8,128,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,32,8,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,1,128,0,1,float16,float16,0,0.2960960070292155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,1,128,0,1,float16,fp8,0,0.29526933034261066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,1,128,0,1,fp8,fp8,0,0.2818506757418315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,2,128,0,1,float16,float16,0,0.30156266689300537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,2,128,0,1,fp8,fp8,0,0.2834560076395671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,2,128,0,1,float16,fp8,0,0.29709867636362713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,4,128,0,1,float16,float16,0,0.31454400221506756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,4,128,0,1,float16,fp8,0,0.30955733855565387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,4,128,0,1,fp8,fp8,0,0.3026026686032613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,8,128,0,1,float16,float16,0,0.31648000081380206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,8,128,0,1,float16,fp8,0,0.30938132603963214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,32,8,128,0,1,fp8,fp8,0,0.30298133691151935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,32,128,0,1,float16,float16,0,0.17627733945846558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,32,128,0,1,float16,fp8,0,0.17270400126775107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,32,128,0,1,fp8,fp8,0,0.17384000619252524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,1,128,0,1,float16,float16,0,0.15637866655985513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,1,128,0,1,float16,fp8,0,0.15465600291887918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,1,128,0,1,fp8,fp8,0,0.14840533336003622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,2,128,0,1,float16,float16,0,0.15851733088493347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,2,128,0,1,float16,fp8,0,0.15634666879971823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,2,128,0,1,fp8,fp8,0,0.14919466773668924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,4,128,0,1,float16,float16,0,0.16456533471743265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,4,128,0,1,float16,fp8,0,0.16289599736531576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,4,128,0,1,fp8,fp8,0,0.16012799739837646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,8,128,0,1,float16,float16,0,0.16516799728075662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,8,128,0,1,float16,fp8,0,0.16409066319465637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,32,128,0,1,float16,float16,0,0.09744000434875488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,32,128,0,1,float16,fp8,0,0.09523199995358785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,32,8,128,0,1,fp8,fp8,0,0.1585706671079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,32,128,0,1,fp8,fp8,0,0.09753066301345825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,1,128,0,1,float16,float16,0,0.0851200024286906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,1,128,0,1,float16,fp8,0,0.08508267005284627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,2,128,0,1,float16,float16,0,0.08680533369382222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,1,128,0,1,fp8,fp8,0,0.07877333462238312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,2,128,0,1,float16,fp8,0,0.08505599697430928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,2,128,0,1,fp8,fp8,0,0.08075200021266937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,4,128,0,1,float16,float16,0,0.08939733107884724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,4,128,0,1,float16,fp8,0,0.08929066856702168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,4,128,0,1,fp8,fp8,0,0.08488532900810242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,8,128,0,1,float16,float16,0,0.09006399909655254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,8,128,0,1,float16,fp8,0,0.0909493366877238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,32,8,128,0,1,fp8,fp8,0,0.08898133039474487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,32,128,0,1,float16,float16,0,0.05243200063705444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,32,128,0,1,float16,fp8,0,0.052341332038243614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,1,128,0,1,float16,float16,0,0.04985066751639048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,32,128,0,1,fp8,fp8,0,0.0524586687485377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,1,128,0,1,float16,fp8,0,0.04839999973773956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,1,128,0,1,fp8,fp8,0,0.04609066744645437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,2,128,0,1,float16,float16,0,0.04946133494377136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,2,128,0,1,float16,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,4,128,0,1,float16,float16,0,0.05188799897829691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,2,128,0,1,fp8,fp8,0,0.04789866507053375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,4,128,0,1,float16,fp8,0,0.053818667928377785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,4,128,0,1,fp8,fp8,0,0.05017599960168203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,8,128,0,1,float16,float16,0,0.05231999854246775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,8,128,0,1,float16,fp8,0,0.052095999320348106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,32,128,0,1,float16,float16,0,0.03403199960788091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,32,128,0,1,float16,fp8,0,0.03572266548871994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,32,8,128,0,1,fp8,fp8,0,0.049925332268079124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,32,128,0,1,fp8,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,1,128,0,1,float16,float16,0,0.03392533212900162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,1,128,0,1,float16,fp8,0,0.033376000821590424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,1,128,0,1,fp8,fp8,0,0.031658666829268135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,2,128,0,1,float16,float16,0,0.0335359995563825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,2,128,0,1,float16,fp8,0,0.03385066737731298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,2,128,0,1,fp8,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,4,128,0,1,float16,float16,0,0.03347733368476232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,4,128,0,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,4,128,0,1,fp8,fp8,0,0.033301333586374916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,8,128,0,1,float16,float16,0,0.035173334181308746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,8,128,0,1,float16,fp8,0,0.03372266640265783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,32,128,0,1,float16,float16,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,32,128,0,1,float16,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,32,8,128,0,1,fp8,fp8,0,0.03323733309904734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,32,128,0,1,fp8,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,1,128,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,1,128,0,1,float16,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,1,128,0,1,fp8,fp8,0,0.023018665611743927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,2,128,0,1,float16,float16,0,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,2,128,0,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,2,128,0,1,fp8,fp8,0,0.02313599983851115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,4,128,0,1,float16,float16,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,4,128,0,1,float16,fp8,0,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,4,128,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,8,128,0,1,float16,float16,0,0.023717333873112995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,8,128,0,1,float16,fp8,0,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,32,8,128,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,32,128,0,1,float16,float16,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,32,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,32,128,0,1,fp8,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,1,128,0,1,float16,float16,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,1,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,1,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,2,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,2,128,0,1,float16,fp8,0,0.017909333109855652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,2,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,4,128,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,4,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,4,128,0,1,fp8,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,8,128,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,8,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,32,8,128,0,1,fp8,fp8,0,0.017632000148296356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,32,128,0,1,float16,float16,0,0.016794666647911072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,32,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,1,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,32,128,0,1,fp8,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,1,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,1,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,2,128,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,2,128,0,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,2,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,4,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,4,128,0,1,float16,fp8,0,0.01628799984852473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,4,128,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,8,128,0,1,float16,float16,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,8,128,0,1,float16,fp8,0,0.016496000190575916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,32,8,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,32,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,32,128,0,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,32,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,1,128,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,1,128,0,1,float16,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,2,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,2,128,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,2,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,4,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,4,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,4,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,8,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,8,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,32,8,128,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,1,128,0,1,float16,float16,0,0.2182826598485311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,1,128,0,1,float16,fp8,0,0.21786133448282877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,1,128,0,1,fp8,fp8,0,0.20995734135309854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,2,128,0,1,float16,float16,0,0.22054932514826456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,2,128,0,1,float16,fp8,0,0.21799999475479126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,2,128,0,1,fp8,fp8,0,0.21037334203720093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,4,128,0,1,float16,float16,0,0.22587732474009195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,4,128,0,1,float16,fp8,0,0.22474133968353271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,4,128,0,1,fp8,fp8,0,0.22019733985265097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,8,128,0,1,float16,fp8,0,0.22595733404159546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,8,128,0,1,float16,float16,0,0.22447999318440756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,32,8,128,0,1,fp8,fp8,0,0.2205173373222351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,32,128,0,1,float16,float16,0,0.12634133299191794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,32,128,0,1,float16,fp8,0,0.12665599584579468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,32,128,0,1,fp8,fp8,0,0.12626666824022928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,1,128,0,1,float16,float16,0,0.11665599544843037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,1,128,0,1,float16,fp8,0,0.11582400401433308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,1,128,0,1,fp8,fp8,0,0.10949333508809407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,2,128,0,1,float16,float16,0,0.11779733498891194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,2,128,0,1,float16,fp8,0,0.11575999855995178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,2,128,0,1,fp8,fp8,0,0.10966933767000835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,4,128,0,1,float16,float16,0,0.12036800384521484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,4,128,0,1,float16,fp8,0,0.11982933680216472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,4,128,0,1,fp8,fp8,0,0.11588266491889954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,8,128,0,1,float16,float16,0,0.12057066957155864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,8,128,0,1,float16,fp8,0,0.12184000015258789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,32,128,0,1,float16,float16,0,0.06838933130105336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,32,8,128,0,1,fp8,fp8,0,0.11833600203196208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,32,128,0,1,float16,fp8,0,0.06808533271153767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,32,128,0,1,fp8,fp8,0,0.06854400038719177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,1,128,0,1,float16,float16,0,0.06576000154018402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,1,128,0,1,float16,fp8,0,0.06659733255704244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,1,128,0,1,fp8,fp8,0,0.06208533545335134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,2,128,0,1,float16,float16,0,0.06637333333492279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,2,128,0,1,fp8,fp8,0,0.06251200040181477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,2,128,0,1,float16,fp8,0,0.06487466891606648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,4,128,0,1,float16,float16,0,0.06656000018119812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,4,128,0,1,float16,fp8,0,0.06804266571998596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,4,128,0,1,fp8,fp8,0,0.06417599817117055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,8,128,0,1,float16,float16,0,0.06842666864395142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,8,128,0,1,float16,fp8,0,0.06658133367697398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,32,8,128,0,1,fp8,fp8,0,0.06449066599210103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,32,128,0,1,float16,float16,0,0.04205866654713949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,32,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,1,128,0,1,float16,float16,0,0.03997333347797394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,1,128,0,1,float16,fp8,0,0.040175999204317726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,32,128,0,1,fp8,fp8,0,0.04080000023047129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,1,128,0,1,fp8,fp8,0,0.03807999938726425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,2,128,0,1,float16,float16,0,0.04055999964475632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,2,128,0,1,float16,fp8,0,0.041189332803090416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,4,128,0,1,float16,float16,0,0.04246933261553446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,2,128,0,1,fp8,fp8,0,0.03834133346875509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,4,128,0,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,8,128,0,1,float16,float16,0,0.04177066683769226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,4,128,0,1,fp8,fp8,0,0.039808000127474465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,8,128,0,1,fp8,fp8,0,0.03973866750796636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,32,128,0,1,float16,float16,0,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,32,8,128,0,1,float16,fp8,0,0.04119999955097834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,32,128,0,1,float16,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,1,128,0,1,float16,float16,0,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,32,128,0,1,fp8,fp8,0,0.027493332823117573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,1,128,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,1,128,0,1,float16,fp8,0,0.028912000358104706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,2,128,0,1,float16,float16,0,0.027952000498771667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,2,128,0,1,float16,fp8,0,0.02759466568628947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,2,128,0,1,fp8,fp8,0,0.02586666742960612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,4,128,0,1,float16,fp8,0,0.027653334041436512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,4,128,0,1,float16,float16,0,0.0301706666747729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,4,128,0,1,fp8,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,8,128,0,1,float16,float16,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,8,128,0,1,float16,fp8,0,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,32,8,128,0,1,fp8,fp8,0,0.02717866748571396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,32,128,0,1,float16,float16,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,32,128,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,32,128,0,1,fp8,fp8,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,1,128,0,1,float16,float16,0,0.01966399947802226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,1,128,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,2,128,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,1,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,2,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,2,128,0,1,fp8,fp8,0,0.018842666099468868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,4,128,0,1,float16,float16,0,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,4,128,0,1,float16,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,4,128,0,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,8,128,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,8,128,0,1,float16,fp8,0,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,32,8,128,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,32,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,32,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,32,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,1,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,1,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,1,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,2,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,2,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,2,128,0,1,float16,fp8,0,0.016688000410795212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,4,128,0,1,float16,float16,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,4,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,4,128,0,1,fp8,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,8,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,8,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,32,8,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,32,128,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,32,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,32,128,0,1,fp8,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,1,128,0,1,float16,float16,0,0.015509333461523056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,1,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,1,128,0,1,fp8,fp8,0,0.015749332805474598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,2,128,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,2,128,0,1,float16,fp8,0,0.016751999656359356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,2,128,0,1,fp8,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,4,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,4,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,8,128,0,1,float16,float16,0,0.016864000509182613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,4,128,0,1,fp8,fp8,0,0.016095999628305435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,8,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,32,8,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,32,128,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,32,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,32,128,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,1,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,1,128,0,1,fp8,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,1,128,0,1,float16,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,2,128,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,2,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,2,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,4,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,4,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,4,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,8,128,0,1,float16,float16,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,8,128,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,32,8,128,0,1,fp8,fp8,0,0.016282666474580765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,1,128,0,1,float16,float16,0,0.18162665764490762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,1,128,0,1,float16,fp8,0,0.18138132492701212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,1,128,0,1,fp8,fp8,0,0.1711519956588745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,2,128,0,1,float16,fp8,0,0.17939200003941855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,2,128,0,1,float16,float16,0,0.1827039917310079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,2,128,0,1,fp8,fp8,0,0.17179733514785767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,4,128,0,1,float16,float16,0,0.18331732352574667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,4,128,0,1,float16,fp8,0,0.18317866325378418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,4,128,0,1,fp8,fp8,0,0.17832533518473306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,8,128,0,1,float16,float16,0,0.18488534291585287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,8,128,0,1,float16,fp8,0,0.1846239964167277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,32,8,128,0,1,fp8,fp8,0,0.17974932988484701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,32,128,0,1,float16,float16,0,0.10087999701499939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,32,128,0,1,float16,fp8,0,0.10006933410962422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,32,128,0,1,fp8,fp8,0,0.10130133231480916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,1,128,0,1,float16,float16,0,0.09710400303204854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,1,128,0,1,float16,fp8,0,0.09733333190282185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,1,128,0,1,fp8,fp8,0,0.09315199653307597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,2,128,0,1,float16,float16,0,0.0969546635945638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,2,128,0,1,float16,fp8,0,0.09723200400670369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,2,128,0,1,fp8,fp8,0,0.09322667121887207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,4,128,0,1,float16,float16,0,0.09738666812578838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,4,128,0,1,float16,fp8,0,0.09758933385213216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,4,128,0,1,fp8,fp8,0,0.09521599610646565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,8,128,0,1,float16,float16,0,0.0972053309281667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,32,128,0,1,float16,float16,0,0.05788266658782959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,32,128,0,1,float16,fp8,0,0.0581226646900177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,8,128,0,1,fp8,fp8,0,0.09532800316810608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,32,8,128,0,1,float16,fp8,0,0.09861866633097331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,32,128,0,1,fp8,fp8,0,0.05630933245023092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,1,128,0,1,float16,float16,0,0.05779733260472616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,1,128,0,1,float16,fp8,0,0.05644799768924713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,1,128,0,1,fp8,fp8,0,0.05468266705671946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,2,128,0,1,float16,float16,0,0.056186666091283165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,2,128,0,1,float16,fp8,0,0.05638933181762695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,2,128,0,1,fp8,fp8,0,0.05397866666316986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,4,128,0,1,fp8,fp8,0,0.055946667989095054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,4,128,0,1,float16,fp8,0,0.05653866628805796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,4,128,0,1,float16,float16,0,0.058650667468706764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,8,128,0,1,float16,float16,0,0.058186665177345276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,8,128,0,1,float16,fp8,0,0.05806399881839752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,32,8,128,0,1,fp8,fp8,0,0.05635733405749003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,32,128,0,1,float16,fp8,0,0.035573333501815796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,32,128,0,1,float16,float16,0,0.03623466690381368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,32,128,0,1,fp8,fp8,0,0.035536001125971474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,1,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,1,128,0,1,float16,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,1,128,0,1,fp8,fp8,0,0.033813332517941795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,2,128,0,1,float16,float16,0,0.033674667278925575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,2,128,0,1,float16,fp8,0,0.03548266738653183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,2,128,0,1,fp8,fp8,0,0.03427733232577642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,4,128,0,1,float16,float16,0,0.03540800015131632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,4,128,0,1,float16,fp8,0,0.035562666753927864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,4,128,0,1,fp8,fp8,0,0.03401066611210505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,8,128,0,1,float16,float16,0,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,8,128,0,1,float16,fp8,0,0.03565333286921183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,32,128,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,32,8,128,0,1,fp8,fp8,0,0.0339626669883728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,32,128,0,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,32,128,0,1,fp8,fp8,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,1,128,0,1,float16,float16,0,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,1,128,0,1,float16,fp8,0,0.025263999899228413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,1,128,0,1,fp8,fp8,0,0.024058667321999867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,2,128,0,1,float16,fp8,0,0.023631999890009563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,2,128,0,1,float16,float16,0,0.025727999707063038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,2,128,0,1,fp8,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,4,128,0,1,float16,float16,0,0.02342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,4,128,0,1,float16,fp8,0,0.02437866727511088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,4,128,0,1,fp8,fp8,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,8,128,0,1,float16,float16,0,0.02365333338578542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,8,128,0,1,float16,fp8,0,0.024533333877722423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,32,8,128,0,1,fp8,fp8,0,0.023706667125225067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,32,128,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,32,128,0,1,float16,fp8,0,0.019573333362738293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,32,128,0,1,fp8,fp8,0,0.01882133384545644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,1,128,0,1,float16,float16,0,0.019082666685183842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,1,128,0,1,float16,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,1,128,0,1,fp8,fp8,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,2,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,2,128,0,1,float16,fp8,0,0.019834666202465694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,2,128,0,1,fp8,fp8,0,0.01933866615096728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,4,128,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,4,128,0,1,float16,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,4,128,0,1,fp8,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,8,128,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,8,128,0,1,float16,fp8,0,0.019744000087181728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,32,8,128,0,1,fp8,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,32,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,32,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,32,128,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,1,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,1,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,1,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,2,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,2,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,2,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,4,128,0,1,float16,float16,0,0.016373333831628162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,4,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,4,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,8,128,0,1,float16,float16,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,8,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,32,8,128,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,32,128,0,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,32,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,32,128,0,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,1,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,1,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,1,128,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,2,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,2,128,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,2,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,4,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,4,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,4,128,0,1,fp8,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,8,128,0,1,float16,float16,0,0.015674666812022526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,8,128,0,1,float16,fp8,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,32,8,128,0,1,fp8,fp8,0,0.016672000288963318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,32,128,0,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,32,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,32,128,0,1,fp8,fp8,0,0.015685333559910457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,1,128,0,1,float16,float16,0,0.015498666713635126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,1,128,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,1,128,0,1,fp8,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,2,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,2,128,0,1,float16,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,2,128,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,4,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,4,128,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,4,128,0,1,fp8,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,8,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,8,128,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,32,8,128,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,0,0.1523360013961792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,0,0.15269866585731506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,1,128,0,1,fp8,fp8,0,0.13844799995422363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,2,128,0,1,fp8,fp8,0,0.14037866393725076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,0,0.15278933445612589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,0,0.1529759963353475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,0,0.15286399920781454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,0,0.15264000495274863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,4,128,0,1,fp8,fp8,0,0.140255997578303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,0,0.1528426706790924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,0,0.15331199765205383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,0,0.08288000027338664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,32,8,128,0,1,fp8,fp8,0,0.1400053302447001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,0,0.08296533425649007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,32,128,0,1,fp8,fp8,0,0.07723733286062877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,0,0.08299200236797333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,0,0.08270399769147237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,1,128,0,1,fp8,fp8,0,0.07718933125336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,0,0.08281599978605907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,0,0.0832586685816447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,2,128,0,1,fp8,fp8,0,0.07710933188597362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,0,0.08285866677761078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,0,0.08323200047016144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,4,128,0,1,fp8,fp8,0,0.0768746683994929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,0,0.08316266536712646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,0,0.08347200353940327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,32,8,128,0,1,fp8,fp8,0,0.07717866698900859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,0,0.04996266464392344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,0,0.05014933149019877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,32,128,0,1,fp8,fp8,0,0.04602666695912679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,0,0.04816000163555145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,0,0.05020800232887268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,1,128,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,0,0.04826666911443075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,0,0.050213331977526345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,2,128,0,1,fp8,fp8,0,0.04580266773700714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,0,0.04854933420817057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,0,0.0498879998922348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,4,128,0,1,fp8,fp8,0,0.04606399933497111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,0,0.047797332207361855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,0,0.04905066887537638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,32,8,128,0,1,fp8,fp8,0,0.04582933088143667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,0,0.03139200061559677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,0,0.03197866678237915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,32,128,0,1,fp8,fp8,0,0.03014933317899704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,0,0.032442666590213776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,1,128,0,1,fp8,fp8,0,0.029951999584833782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,0,0.03138133386770884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,0,0.03166933357715607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,2,128,0,1,fp8,fp8,0,0.031018666923046112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,0,0.03140799949566523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,0,0.03197333216667175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,4,128,0,1,fp8,fp8,0,0.031104000906149547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,0,0.03196266790231069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,0,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,32,8,128,0,1,fp8,fp8,0,0.02975466599067052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,0,0.023503998915354412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,32,128,0,1,fp8,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,0,0.023567999402681988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,1,128,0,1,fp8,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,0,0.023413332800070446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,2,128,0,1,fp8,fp8,0,0.023599999646345775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,0,0.023434666295846302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,4,128,0,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,0,0.02385066697994868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,32,8,128,0,1,fp8,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,0,0.021551998953024547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,32,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,0,0.019610666980346043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,1,128,0,1,fp8,fp8,0,0.019573333362738293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,0,0.01958400011062622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,2,128,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,4,128,0,1,fp8,fp8,0,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,8,128,0,1,fp8,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,0,0.02086399992307027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,0,0.01613866661985715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,32,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,1,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,2,128,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,0,0.01757866640885671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,4,128,0,1,fp8,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,0,0.015872000406185787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,0,0.018464000274737675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,32,8,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,32,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,0,0.016063999384641647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,1,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,0,0.01573333392540614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,2,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,0,0.016730666160583496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,4,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,0,0.016229332735141117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,32,8,128,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,0,0.01758933315674464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,32,128,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,1,128,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,0,0.015610666324694952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,0,0.016042667130629223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,2,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,4,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,0,0.015728000551462173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,32,8,128,0,1,fp8,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,1,128,0,1,fp8,fp8,0,6.5961761474609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,float16,0,8.648656209309896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,1,128,0,1,float16,fp8,0,8.566485087076822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,float16,0,8.866975784301758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,2,128,0,1,float16,fp8,0,8.409562428792318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,2,128,0,1,fp8,fp8,0,6.718202590942383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,float16,0,8.564858754475912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,4,128,0,1,fp8,fp8,0,6.732959747314453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,4,128,0,1,float16,fp8,0,8.7706667582194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,float16,0,9.419674555460611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,8,128,0,1,float16,fp8,0,9.30622927347819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,float16,0,4.581136067708333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,24,128,0,1,float16,fp8,0,4.563594818115234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,24,8,128,0,1,fp8,fp8,0,6.7807572682698565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,24,128,0,1,fp8,fp8,0,3.550688107808431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,float16,0,4.287018775939941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,1,128,0,1,fp8,fp8,0,3.392319997151693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,1,128,0,1,float16,fp8,0,4.452639897664388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,float16,0,4.257280031840007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,2,128,0,1,fp8,fp8,0,3.40281613667806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,2,128,0,1,float16,fp8,0,4.239450772603353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,float16,0,4.456272125244141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,4,128,0,1,float16,fp8,0,4.376234690348308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,4,128,0,1,fp8,fp8,0,3.4128640492757163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,float16,0,4.5164534250895185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,float16,0,2.245680014292399
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,8,128,0,1,float16,fp8,0,4.3723039627075195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,24,8,128,0,1,fp8,fp8,0,3.4394025802612305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,24,128,0,1,float16,fp8,0,2.326042652130127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,24,128,0,1,fp8,fp8,0,1.8893973032633464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,fp8,0,2.1584107081095376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,1,128,0,1,float16,float16,0,2.1762773195902505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,1,128,0,1,fp8,fp8,0,1.85153595606486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,2,128,0,1,fp8,fp8,0,1.806671937306722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,float16,0,2.1821386019388833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,2,128,0,1,float16,fp8,0,2.214896043141683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,float16,0,2.1817866961161294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,4,128,0,1,fp8,fp8,0,1.8101545969645183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,4,128,0,1,float16,fp8,0,2.1946560541788735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,float16,0,2.242245356241862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,8,128,0,1,float16,fp8,0,2.2365333239237466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,float16,0,1.2687893708546956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,24,128,0,1,float16,fp8,0,1.2827359835306804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,24,8,128,0,1,fp8,fp8,0,1.832490603129069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,24,128,0,1,fp8,fp8,0,1.0491306781768799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,float16,0,1.1786399682362874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,1,128,0,1,fp8,fp8,0,1.0069493452707927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,1,128,0,1,float16,fp8,0,1.1836586793263753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,float16,0,1.192197322845459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,2,128,0,1,fp8,fp8,0,1.012506643931071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,2,128,0,1,float16,fp8,0,1.1927893161773682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,float16,0,1.1865066687266033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,4,128,0,1,float16,fp8,0,1.2155306339263916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,4,128,0,1,fp8,fp8,0,1.0163893699645996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,float16,0,1.2042400042215984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,8,128,0,1,float16,fp8,0,1.1997653643290203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,24,8,128,0,1,fp8,fp8,0,1.0458347002665203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,1,128,0,1,fp8,fp8,0,3.98909854888916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,float16,0,4.980101267496745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,1,128,0,1,float16,fp8,0,5.068319956461589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,float16,0,4.997498512268066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,2,128,0,1,fp8,fp8,0,4.008506774902344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,2,128,0,1,float16,fp8,0,5.044447898864746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,float16,0,5.077413241068522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,4,128,0,1,float16,fp8,0,5.233829180399577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,4,128,0,1,fp8,fp8,0,4.028821309407552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,float16,0,5.201007843017578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,8,128,0,1,float16,fp8,0,5.239514668782552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,24,8,128,0,1,fp8,fp8,0,4.0723520914713545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,float16,0,2.663621266682943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,24,128,0,1,float16,fp8,0,2.67792542775472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,24,128,0,1,fp8,fp8,0,2.2086453437805176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,float16,0,2.4962719281514487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,1,128,0,1,float16,fp8,0,2.505194664001465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,1,128,0,1,fp8,fp8,0,2.06222931543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,float16,0,2.5134506225585938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,2,128,0,1,float16,fp8,0,2.5156426429748535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,2,128,0,1,fp8,fp8,0,2.0783252716064453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,float16,0,2.4969919522603354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,4,128,0,1,float16,fp8,0,2.541055997212728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,4,128,0,1,fp8,fp8,0,2.083733399709066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,float16,0,2.557274659474691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,8,128,0,1,float16,fp8,0,2.560704072316488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,24,8,128,0,1,fp8,fp8,0,2.1048213640848794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,float16,0,1.4066400527954102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,24,128,0,1,fp8,fp8,0,1.1795360247294109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,float16,0,1.3178079922993977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,24,128,0,1,float16,fp8,0,1.434725284576416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,1,128,0,1,fp8,fp8,0,1.1150026321411133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,1,128,0,1,float16,fp8,0,1.314677317937215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,float16,0,1.3285600344340007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,2,128,0,1,fp8,fp8,0,1.1160906950632732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,2,128,0,1,float16,fp8,0,1.326037327448527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,float16,0,1.318021297454834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,4,128,0,1,float16,fp8,0,1.3428959846496582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,4,128,0,1,fp8,fp8,0,1.1672320365905762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,float16,0,1.3466986020406086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,8,128,0,1,float16,fp8,0,1.3503306706746419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,float16,0,0.7865706284840902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,24,8,128,0,1,fp8,fp8,0,1.130570650100708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,24,128,0,1,float16,fp8,0,0.8150826295216879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,24,128,0,1,fp8,fp8,0,0.6735413074493408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,float16,0,0.736245314280192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,1,128,0,1,fp8,fp8,0,0.6417493422826132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,1,128,0,1,float16,fp8,0,0.7341333230336508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,float16,0,0.746346632639567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,2,128,0,1,float16,fp8,0,0.7443733215332031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,2,128,0,1,fp8,fp8,0,0.643834670384725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,float16,0,0.7493226528167725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,4,128,0,1,float16,fp8,0,0.7543253103892008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,4,128,0,1,fp8,fp8,0,0.6460320154825846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,float16,0,0.7504213651021322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,8,128,0,1,fp8,fp8,0,0.6533279816309611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,24,8,128,0,1,float16,fp8,0,0.7617653210957845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,1,128,0,1,fp8,fp8,0,2.9254401524861655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,float16,0,3.5447521209716797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,1,128,0,1,float16,fp8,0,3.5147412618001304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,float16,0,3.658405303955078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,2,128,0,1,fp8,fp8,0,2.9386507670084634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,2,128,0,1,float16,fp8,0,3.6335573196411133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,float16,0,3.589280128479004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,4,128,0,1,float16,fp8,0,3.626703898111979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,4,128,0,1,fp8,fp8,0,2.9630613327026367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,float16,0,3.6755733489990234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,8,128,0,1,fp8,fp8,0,2.9808266957600913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,24,8,128,0,1,float16,fp8,0,3.7301600774129233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,float16,0,1.9576266606648762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,24,128,0,1,fp8,fp8,0,1.6287892659505208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,24,128,0,1,float16,fp8,0,2.0234986941019693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,float16,0,1.8254186312357585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,1,128,0,1,float16,fp8,0,1.802677313486735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,1,128,0,1,fp8,fp8,0,1.5287359555562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,float16,0,1.8542933464050293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,2,128,0,1,float16,fp8,0,1.8527466456095378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,2,128,0,1,fp8,fp8,0,1.6122132937113445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,float16,0,1.828719933827718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,4,128,0,1,float16,fp8,0,1.8795040448506672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,4,128,0,1,fp8,fp8,0,1.5830613772074382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,float16,0,1.8570666313171387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,float16,0,1.0426666736602783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,8,128,0,1,float16,fp8,0,1.8871253331502278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,24,8,128,0,1,fp8,fp8,0,1.664031982421875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,24,128,0,1,float16,fp8,0,1.0721173286437988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,24,128,0,1,fp8,fp8,0,0.8834613164265951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,float16,0,0.9843786557515463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,1,128,0,1,float16,fp8,0,0.9756906827290853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,1,128,0,1,fp8,fp8,0,0.8851146697998047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,float16,0,0.9691413243611654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,2,128,0,1,float16,fp8,0,0.9858187039693197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,2,128,0,1,fp8,fp8,0,0.831653356552124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,float16,0,0.9856800238291422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,4,128,0,1,fp8,fp8,0,0.84115203221639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,4,128,0,1,float16,fp8,0,0.9801119963328043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,float16,0,1.0002720355987549
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,8,128,0,1,float16,fp8,0,1.0014453728993733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,float16,0,0.5972479979197184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,24,128,0,1,float16,fp8,0,0.605685313542684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,24,8,128,0,1,fp8,fp8,0,0.8487892945607504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,24,128,0,1,fp8,fp8,0,0.5152213176091512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,float16,0,0.5462986628214518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,1,128,0,1,float16,fp8,0,0.5517173210779825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,1,128,0,1,fp8,fp8,0,0.488597313563029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,float16,0,0.5538933277130127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,2,128,0,1,fp8,fp8,0,0.48875733216603595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,2,128,0,1,float16,fp8,0,0.5513439973195394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,float16,0,0.5640906492869059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,4,128,0,1,float16,fp8,0,0.5670080184936523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,4,128,0,1,fp8,fp8,0,0.4922720193862915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,float16,0,0.5679893493652344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,8,128,0,1,float16,fp8,0,0.573082685470581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,24,8,128,0,1,fp8,fp8,0,0.4968053499857585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,1,128,0,1,fp8,fp8,0,3.949317296346029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,float16,0,4.769541422526042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,1,128,0,1,float16,fp8,0,4.910880088806152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,float16,0,4.832346598307292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,2,128,0,1,fp8,fp8,0,3.9748427073160806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,2,128,0,1,float16,fp8,0,4.86788272857666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,float16,0,4.931935946146647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,4,128,0,1,float16,fp8,0,4.846080144246419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,4,128,0,1,fp8,fp8,0,4.009653409322103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,float16,0,5.040197372436523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,8,128,0,1,fp8,fp8,0,4.037722587585449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,24,8,128,0,1,float16,fp8,0,4.942538579305013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,float16,0,2.650207996368408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,24,128,0,1,fp8,fp8,0,2.177274703979492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,24,128,0,1,float16,fp8,0,2.71614933013916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,float16,0,2.392848014831543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,1,128,0,1,fp8,fp8,0,2.0196480751037598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,1,128,0,1,float16,fp8,0,2.3808107376098633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,float16,0,2.446282704671224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,2,128,0,1,fp8,fp8,0,2.016613324483236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,2,128,0,1,float16,fp8,0,2.4320319493611655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,float16,0,2.453338623046875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,4,128,0,1,fp8,fp8,0,2.0301920572916665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,4,128,0,1,float16,fp8,0,2.442837397257487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,float16,0,2.4830880165100098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,float16,0,1.3649919827779133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,8,128,0,1,float16,fp8,0,2.550874710083008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,24,8,128,0,1,fp8,fp8,0,2.0574132601420083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,24,128,0,1,float16,fp8,0,1.3901972770690918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,24,128,0,1,fp8,fp8,0,1.1455946763356526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,float16,0,1.2860960165659587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,1,128,0,1,fp8,fp8,0,1.0657920042673747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,1,128,0,1,float16,fp8,0,1.2340532938639324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,float16,0,1.2635146776835124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,2,128,0,1,float16,fp8,0,1.2721386750539143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,2,128,0,1,fp8,fp8,0,1.0667253335316975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,float16,0,1.2702986399332683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,4,128,0,1,float16,fp8,0,1.272597312927246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,4,128,0,1,fp8,fp8,0,1.0745226542154949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,float16,0,1.2851786613464355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,8,128,0,1,float16,fp8,0,1.2885440190633137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,float16,0,0.7475787003835043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,24,8,128,0,1,fp8,fp8,0,1.1084907054901123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,24,128,0,1,float16,fp8,0,0.7546719710032145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,24,128,0,1,fp8,fp8,0,0.644373337427775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,float16,0,0.6785973707834879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,1,128,0,1,fp8,fp8,0,0.5978026787439982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,1,128,0,1,float16,fp8,0,0.6805760065714518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,float16,0,0.6793333689371744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,2,128,0,1,float16,fp8,0,0.6864746411641439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,2,128,0,1,fp8,fp8,0,0.5941226482391357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,float16,0,0.6905120213826498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,4,128,0,1,float16,fp8,0,0.6890666484832764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,4,128,0,1,fp8,fp8,0,0.5979839960734049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,float16,0,0.7025173505147299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,8,128,0,1,float16,fp8,0,0.7049386501312256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,24,8,128,0,1,fp8,fp8,0,0.6021600166956583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,float16,0,0.4362613360087077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,24,128,0,1,float16,fp8,0,0.43887468179066974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,24,128,0,1,fp8,fp8,0,0.3763999938964844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,float16,0,0.38787734508514404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,1,128,0,1,float16,fp8,0,0.3906186819076538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,1,128,0,1,fp8,fp8,0,0.35155200958251953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,float16,0,0.39120535055796307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,2,128,0,1,fp8,fp8,0,0.35292800267537433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,2,128,0,1,float16,fp8,0,0.39044801394144696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,float16,0,0.39747198422749835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,4,128,0,1,float16,fp8,0,0.4018666744232178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,4,128,0,1,fp8,fp8,0,0.3580746650695801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,float16,0,0.40848533312479657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,8,128,0,1,float16,fp8,0,0.4118773142496745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,24,8,128,0,1,fp8,fp8,0,0.35922133922576904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,float16,0,2.9254560470581055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,1,128,0,1,float16,fp8,0,2.9151414235432944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,1,128,0,1,fp8,fp8,0,2.4656480153401694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,float16,0,2.9508374532063804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,2,128,0,1,fp8,fp8,0,2.4774293899536133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,2,128,0,1,float16,fp8,0,2.9649600982666016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,float16,0,2.964186668395996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,4,128,0,1,float16,fp8,0,2.9974241256713867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,4,128,0,1,fp8,fp8,0,2.49837334950765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,float16,0,3.0589173634847007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,8,128,0,1,float16,fp8,0,3.0598185857137046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,24,8,128,0,1,fp8,fp8,0,2.5426026980082193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,float16,0,1.6569013595581055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,24,128,0,1,fp8,fp8,0,1.3994560241699219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,24,128,0,1,float16,fp8,0,1.7075999577840169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,float16,0,1.482159932454427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,1,128,0,1,float16,fp8,0,1.488650639851888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,1,128,0,1,fp8,fp8,0,1.2706507047017415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,float16,0,1.5038506189982097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,2,128,0,1,float16,fp8,0,1.5113120079040527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,2,128,0,1,fp8,fp8,0,1.303877353668213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,float16,0,1.5120426813761394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,4,128,0,1,fp8,fp8,0,1.2898186842600505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,4,128,0,1,float16,fp8,0,1.535568078358968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,float16,0,1.5337653160095215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,float16,0,0.8899573485056559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,8,128,0,1,fp8,fp8,0,1.3090186913808186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,24,8,128,0,1,float16,fp8,0,1.5505119959513347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,24,128,0,1,float16,fp8,0,0.900170644124349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,24,128,0,1,fp8,fp8,0,0.7470506827036539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,float16,0,0.7869599660237631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,1,128,0,1,float16,fp8,0,0.8025546868642172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,1,128,0,1,fp8,fp8,0,0.678816000620524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,float16,0,0.7947680155436198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,2,128,0,1,float16,fp8,0,0.796837329864502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,2,128,0,1,fp8,fp8,0,0.6844320297241211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,float16,0,0.7941973209381104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,4,128,0,1,float16,fp8,0,0.8051786422729492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,4,128,0,1,fp8,fp8,0,0.6894240379333496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,float16,0,0.8139146963755289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,8,128,0,1,float16,fp8,0,0.8139466444651285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,float16,0,0.4886560042699178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,24,8,128,0,1,fp8,fp8,0,0.699514627456665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,24,128,0,1,float16,fp8,0,0.4932639996210734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,24,128,0,1,fp8,fp8,0,0.4211519956588745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,float16,0,0.43439467748006183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,1,128,0,1,float16,fp8,0,0.43250131607055664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,1,128,0,1,fp8,fp8,0,0.3885546525319417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,float16,0,0.4386666615804036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,2,128,0,1,float16,fp8,0,0.4376213153203328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,2,128,0,1,fp8,fp8,0,0.38977599143981934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,float16,0,0.44729065895080566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,4,128,0,1,float16,fp8,0,0.44259734948476154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,4,128,0,1,fp8,fp8,0,0.3917653163274129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,float16,0,0.45468799273173016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,8,128,0,1,float16,fp8,0,0.4539039929707845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,24,8,128,0,1,fp8,fp8,0,0.3968799908955892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,float16,0,0.29393066962560016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,24,128,0,1,float16,fp8,0,0.29772265752156574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,24,128,0,1,fp8,fp8,0,0.255786657333374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,float16,0,0.25787200530370075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,1,128,0,1,float16,fp8,0,0.25947733720143634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,1,128,0,1,fp8,fp8,0,0.2326293389002482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,float16,0,0.258026659488678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,2,128,0,1,fp8,fp8,0,0.23361066977183023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,2,128,0,1,float16,fp8,0,0.25757867097854614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,float16,0,0.25938133398691815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,4,128,0,1,float16,fp8,0,0.2603360017140706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,4,128,0,1,fp8,fp8,0,0.23841599623362222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,float16,0,0.2680373390515645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,8,128,0,1,fp8,fp8,0,0.24363734324773154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,24,8,128,0,1,float16,fp8,0,0.2673226594924927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,float16,0,3.043333371480306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,1,128,0,1,fp8,fp8,0,2.603994687398275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,1,128,0,1,float16,fp8,0,3.016592025756836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,float16,0,3.0655787785847983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,2,128,0,1,fp8,fp8,0,2.6277333895365396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,2,128,0,1,float16,fp8,0,3.0918347040812173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,float16,0,3.078864097595215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,4,128,0,1,float16,fp8,0,3.124314626057943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,4,128,0,1,fp8,fp8,0,2.651653289794922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,float16,0,3.1894667943318686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,8,128,0,1,float16,fp8,0,3.209808031717936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,24,8,128,0,1,fp8,fp8,0,2.703488032023112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,float16,0,1.748960018157959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,24,128,0,1,float16,fp8,0,1.7676533063252766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,24,128,0,1,fp8,fp8,0,1.5459359486897786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,float16,0,1.5245280265808105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,1,128,0,1,float16,fp8,0,1.526426633199056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,1,128,0,1,fp8,fp8,0,1.3143946329752605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,float16,0,1.5242560704549153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,2,128,0,1,float16,fp8,0,1.5425653457641602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,2,128,0,1,fp8,fp8,0,1.3403147061665852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,float16,0,1.5514133771260579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,4,128,0,1,float16,fp8,0,1.5533013343811035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,4,128,0,1,fp8,fp8,0,1.3398987452189128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,float16,0,1.5915733973185222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,float16,0,0.9085439840952555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,8,128,0,1,float16,fp8,0,1.600096066792806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,24,8,128,0,1,fp8,fp8,0,1.365898609161377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,24,128,0,1,float16,fp8,0,0.9214666684468588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,24,128,0,1,fp8,fp8,0,0.7824640274047852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,float16,0,0.7949546972910563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,1,128,0,1,fp8,fp8,0,0.6898132960001627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,1,128,0,1,float16,fp8,0,0.7860906918843588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,float16,0,0.7994240125020345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,2,128,0,1,fp8,fp8,0,0.6951626936594645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,2,128,0,1,float16,fp8,0,0.8033920129140218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,float16,0,0.8023839791615804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,4,128,0,1,float16,fp8,0,0.8134506543477377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,4,128,0,1,fp8,fp8,0,0.7013440132141113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,float16,0,0.8293120066324869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,8,128,0,1,float16,fp8,0,0.8313173453013102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,24,8,128,0,1,fp8,fp8,0,0.7182026704152426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,float16,0,0.4894453287124634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,24,128,0,1,float16,fp8,0,0.4967679977416992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,24,128,0,1,fp8,fp8,0,0.43352532386779785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,float16,0,0.4274880091349284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,1,128,0,1,float16,fp8,0,0.42681066195170086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,1,128,0,1,fp8,fp8,0,0.37963199615478516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,float16,0,0.42798399925231934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,2,128,0,1,float16,fp8,0,0.43353064854939777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,2,128,0,1,fp8,fp8,0,0.3818399906158447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,float16,0,0.43905067443847656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,4,128,0,1,float16,fp8,0,0.4358559846878052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,4,128,0,1,fp8,fp8,0,0.3863626718521118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,float16,0,0.448794682820638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,8,128,0,1,float16,fp8,0,0.45033065478007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,24,8,128,0,1,fp8,fp8,0,0.39209600289662677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,float16,0,0.2819040020306905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,24,128,0,1,float16,fp8,0,0.2843093276023865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,24,128,0,1,fp8,fp8,0,0.24475733439127603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,float16,0,0.23904534180959067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,1,128,0,1,float16,fp8,0,0.24312533934911093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,1,128,0,1,fp8,fp8,0,0.2197279930114746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,float16,0,0.23947733640670776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,2,128,0,1,float16,fp8,0,0.24243199825286865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,2,128,0,1,fp8,fp8,0,0.22266666094462076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,float16,0,0.24585066239039102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,4,128,0,1,float16,fp8,0,0.24699199199676514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,4,128,0,1,fp8,fp8,0,0.22509332497914633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,float16,0,0.255568007628123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,8,128,0,1,float16,fp8,0,0.25781865914662677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,24,8,128,0,1,fp8,fp8,0,0.22896534204483032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,float16,0,0.17230933904647827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,24,128,0,1,float16,fp8,0,0.17197867234547934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,24,128,0,1,fp8,fp8,0,0.1532693306605021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,float16,0,0.15145599842071533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,1,128,0,1,float16,fp8,0,0.15261866648991904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,1,128,0,1,fp8,fp8,0,0.14049599568049112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,float16,0,0.15179733435312906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,2,128,0,1,float16,fp8,0,0.1530346671740214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,2,128,0,1,fp8,fp8,0,0.1414293348789215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,float16,0,0.15282666683197021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,4,128,0,1,fp8,fp8,0,0.14174399773279825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,4,128,0,1,float16,fp8,0,0.154341330130895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,float16,0,0.15468266606330872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,8,128,0,1,float16,fp8,0,0.15401066342989603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,24,8,128,0,1,fp8,fp8,0,0.14402666687965393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,1,128,0,1,fp8,fp8,0,1.6997653643290203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,float16,0,1.944976011912028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,1,128,0,1,float16,fp8,0,1.939018726348877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,2,128,0,1,fp8,fp8,0,1.7150079409281414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,float16,0,1.9630667368570964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,2,128,0,1,float16,fp8,0,1.9703413645426433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,4,128,0,1,fp8,fp8,0,1.7337013880411785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,fp8,0,1.9868052800496419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,4,128,0,1,float16,float16,0,1.9925920168558757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,float16,0,2.0460640589396157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,float16,0,1.1437866687774658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,24,128,0,1,float16,fp8,0,1.1560693581899006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,8,128,0,1,fp8,fp8,0,1.779754638671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,24,8,128,0,1,float16,fp8,0,2.053050676981608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,24,128,0,1,fp8,fp8,0,0.9921546777089437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,float16,0,0.9905760288238525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,1,128,0,1,fp8,fp8,0,0.8672746817270914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,1,128,0,1,float16,fp8,0,0.9885226885477701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,float16,0,1.0030986467997234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,2,128,0,1,float16,fp8,0,1.0046772956848145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,2,128,0,1,fp8,fp8,0,0.8708799680074056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,float16,0,1.0079999764760335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,4,128,0,1,fp8,fp8,0,0.8833759625752767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,4,128,0,1,float16,fp8,0,1.0201760133107503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,float16,0,1.0435360272725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,8,128,0,1,float16,fp8,0,1.0451306502024333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,float16,0,0.5998613437016805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,24,8,128,0,1,fp8,fp8,0,0.9040160179138184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,24,128,0,1,float16,fp8,0,0.6092960039774576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,24,128,0,1,fp8,fp8,0,0.5255733331044515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,float16,0,0.5236053466796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,1,128,0,1,float16,fp8,0,0.522704005241394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,1,128,0,1,fp8,fp8,0,0.46143468221028644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,float16,0,0.5242240031560262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,2,128,0,1,float16,fp8,0,0.5299786726633707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,2,128,0,1,fp8,fp8,0,0.46546133359273273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,float16,0,0.5318880081176758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,4,128,0,1,float16,fp8,0,0.535157322883606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,4,128,0,1,fp8,fp8,0,0.4700640042622884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,float16,0,0.5487840175628662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,8,128,0,1,float16,fp8,0,0.5541919867197672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,float16,0,0.32966933647791546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,24,8,128,0,1,fp8,fp8,0,0.47893333435058594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,24,128,0,1,float16,fp8,0,0.33530668417612713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,float16,0,0.2843466599782308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,24,128,0,1,fp8,fp8,0,0.2903253237406413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,1,128,0,1,float16,fp8,0,0.28277866045633954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,1,128,0,1,fp8,fp8,0,0.2593173384666443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,float16,0,0.28758933146794635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,2,128,0,1,float16,fp8,0,0.2886773347854614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,2,128,0,1,fp8,fp8,0,0.2584106723467509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,float16,0,0.2923733393351237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,4,128,0,1,float16,fp8,0,0.29444799820582074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,4,128,0,1,fp8,fp8,0,0.26310400168100995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,float16,0,0.3010666569073995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,float16,0,0.19301867485046387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,8,128,0,1,float16,fp8,0,0.3035893241564433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,24,8,128,0,1,fp8,fp8,0,0.26713067293167114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,24,128,0,1,float16,fp8,0,0.19665066401163736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,24,128,0,1,fp8,fp8,0,0.17181867361068726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,fp8,0,0.16566933194796243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,1,128,0,1,float16,float16,0,0.16156799594561258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,1,128,0,1,fp8,fp8,0,0.14863466223080954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,float16,0,0.16299200057983398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,2,128,0,1,float16,fp8,0,0.16315199931462607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,2,128,0,1,fp8,fp8,0,0.14991466204325357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,float16,0,0.16500266393025717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,4,128,0,1,fp8,fp8,0,0.15588800112406412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,4,128,0,1,float16,fp8,0,0.16657599806785583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,float16,0,0.17173866430918375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,8,128,0,1,float16,fp8,0,0.17314666509628296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,float16,0,0.11685333649317424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,24,8,128,0,1,fp8,fp8,0,0.16114133596420288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,24,128,0,1,float16,fp8,0,0.11723732948303223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,24,128,0,1,fp8,fp8,0,0.11179733276367188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,fp8,0,0.10991999506950378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,1,128,0,1,float16,float16,0,0.11052800218264262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,1,128,0,1,fp8,fp8,0,0.10354666908582051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,float16,0,0.10943999886512756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,2,128,0,1,float16,fp8,0,0.11061333616574605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,2,128,0,1,fp8,fp8,0,0.1034879982471466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,float16,0,0.10963199536005656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,4,128,0,1,float16,fp8,0,0.11193600296974182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,4,128,0,1,fp8,fp8,0,0.1037013332049052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,float16,0,0.10973866780598958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,8,128,0,1,float16,fp8,0,0.11264533797899882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,24,8,128,0,1,fp8,fp8,0,0.10374400019645691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,1,128,0,1,fp8,fp8,0,1.9260160128275554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,float16,0,2.2131306330362954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,1,128,0,1,float16,fp8,0,2.2224532763163247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,2,128,0,1,fp8,fp8,0,1.9462292989095051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,float16,0,2.214933395385742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,2,128,0,1,float16,fp8,0,2.2351306279500327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,4,128,0,1,fp8,fp8,0,1.9938079516092937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,float16,0,2.2968907356262207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,4,128,0,1,float16,fp8,0,2.290773391723633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,float16,0,1.2697866757710774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,float16,0,2.318112055460612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,8,128,0,1,fp8,fp8,0,2.010159969329834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,24,8,128,0,1,float16,fp8,0,2.3262346585591636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,24,128,0,1,float16,fp8,0,1.288863976796468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,float16,0,1.0961120128631592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,24,128,0,1,fp8,fp8,0,1.124837319056193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,1,128,0,1,float16,fp8,0,1.0965387026468914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,1,128,0,1,fp8,fp8,0,0.9636586507161459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,float16,0,1.1072159608205159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,2,128,0,1,fp8,fp8,0,0.9768053690592448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,2,128,0,1,float16,fp8,0,1.1062560081481934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,float16,0,1.122330665588379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,4,128,0,1,float16,fp8,0,1.1316640377044678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,4,128,0,1,fp8,fp8,0,0.987930695215861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,float16,0,1.1582187016805012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,8,128,0,1,float16,fp8,0,1.1694080034891765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,float16,0,0.65720001856486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,24,8,128,0,1,fp8,fp8,0,1.0160586833953857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,24,128,0,1,float16,fp8,0,0.6655466556549072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,24,128,0,1,fp8,fp8,0,0.5822506745656332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,float16,0,0.5659840106964111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,1,128,0,1,fp8,fp8,0,0.5029386679331461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,1,128,0,1,float16,fp8,0,0.5684640010197958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,float16,0,0.5719360113143921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,2,128,0,1,float16,fp8,0,0.574837327003479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,2,128,0,1,fp8,fp8,0,0.5094346602757772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,float16,0,0.5827253262201945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,4,128,0,1,fp8,fp8,0,0.5139679908752441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,4,128,0,1,float16,fp8,0,0.5863626797993978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,float16,0,0.6016000111897787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,8,128,0,1,float16,fp8,0,0.6083626747131348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,float16,0,0.3510986566543579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,24,8,128,0,1,fp8,fp8,0,0.5262986818949381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,24,128,0,1,fp8,fp8,0,0.31302400430043537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,24,128,0,1,float16,fp8,0,0.35865068435668945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,float16,0,0.30086400111516315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,1,128,0,1,float16,fp8,0,0.30317866802215576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,1,128,0,1,fp8,fp8,0,0.27342933416366577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,float16,0,0.3065440058708191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,2,128,0,1,float16,fp8,0,0.3068213264147441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,2,128,0,1,fp8,fp8,0,0.2758026719093323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,float16,0,0.31148266792297363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,4,128,0,1,float16,fp8,0,0.31270400683085126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,4,128,0,1,fp8,fp8,0,0.27875733375549316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,float16,0,0.3232746720314026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,8,128,0,1,float16,fp8,0,0.3259893258412679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,float16,0,0.1992853283882141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,24,8,128,0,1,fp8,fp8,0,0.28521599372227985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,24,128,0,1,fp8,fp8,0,0.17796266078948975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,24,128,0,1,float16,fp8,0,0.2024959921836853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,float16,0,0.166485329469045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,1,128,0,1,float16,fp8,0,0.1653333306312561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,1,128,0,1,fp8,fp8,0,0.15470932920773825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,float16,0,0.16736533244450888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,2,128,0,1,float16,fp8,0,0.1680906613667806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,2,128,0,1,fp8,fp8,0,0.15728533267974854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,float16,0,0.1709280014038086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,4,128,0,1,float16,fp8,0,0.1722559928894043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,4,128,0,1,fp8,fp8,0,0.158842662970225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,float16,0,0.17986132701237997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,8,128,0,1,float16,fp8,0,0.1827253301938375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,24,8,128,0,1,fp8,fp8,0,0.1625706652800242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,float16,0,0.11885333061218262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,24,128,0,1,float16,fp8,0,0.1186293363571167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,24,128,0,1,fp8,fp8,0,0.10974933703740437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,float16,0,0.10291733344395955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,1,128,0,1,float16,fp8,0,0.1037013332049052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,float16,0,0.1046506663163503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,1,128,0,1,fp8,fp8,0,0.09265066186587016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,2,128,0,1,float16,fp8,0,0.10382399956385295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,2,128,0,1,fp8,fp8,0,0.0940106709798177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,float16,0,0.1037013332049052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,4,128,0,1,float16,fp8,0,0.1032426655292511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,4,128,0,1,fp8,fp8,0,0.09482666850090027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,float16,0,0.10496532917022705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,8,128,0,1,float16,fp8,0,0.10527466734250386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,24,8,128,0,1,fp8,fp8,0,0.09744000434875488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,float16,0,0.07592533528804779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,24,128,0,1,float16,fp8,0,0.0768693337837855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,24,128,0,1,fp8,fp8,0,0.07178666690985362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,float16,0,0.07461866736412048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,1,128,0,1,float16,fp8,0,0.07454399764537811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,float16,0,0.07403733332951863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,1,128,0,1,fp8,fp8,0,0.06835199892520905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,2,128,0,1,float16,fp8,0,0.07482133309046428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,2,128,0,1,fp8,fp8,0,0.06876266499360402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,float16,0,0.07379733522733052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,4,128,0,1,float16,fp8,0,0.07486400008201599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,4,128,0,1,fp8,fp8,0,0.0690880020459493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,float16,0,0.07454933226108551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,8,128,0,1,float16,fp8,0,0.07450133562088013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,24,8,128,0,1,fp8,fp8,0,0.07017600039641063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,1,128,0,1,fp8,fp8,0,1.3308693567911785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,float16,0,1.5015467007954915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,1,128,0,1,float16,fp8,0,1.5110294024149578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,2,128,0,1,fp8,fp8,0,1.3454772631327312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,float16,0,1.530618667602539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,2,128,0,1,float16,fp8,0,1.5277706782023113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,4,128,0,1,fp8,fp8,0,1.381754716237386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,float16,0,1.568394660949707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,4,128,0,1,float16,fp8,0,1.5772533416748047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,8,128,0,1,fp8,fp8,0,1.3949653307596843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,float16,0,1.587765375773112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,24,8,128,0,1,float16,fp8,0,1.5963412920633953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,fp8,0,0.8943413098653158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,24,128,0,1,fp8,fp8,0,0.7850293318430582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,float16,0,0.7525226275126139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,24,128,0,1,float16,float16,0,0.8847520351409912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,1,128,0,1,fp8,fp8,0,0.669333299001058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,1,128,0,1,float16,fp8,0,0.7595199743906657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,float16,0,0.7611893018086752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,2,128,0,1,float16,fp8,0,0.7666186491648356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,2,128,0,1,fp8,fp8,0,0.6777493158976237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,float16,0,0.7752426465352377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,4,128,0,1,float16,fp8,0,0.7791146437327067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,4,128,0,1,fp8,fp8,0,0.6869280338287354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,float16,0,0.800544023513794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,float16,0,0.46085866292317706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,8,128,0,1,float16,fp8,0,0.8071093559265137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,24,8,128,0,1,fp8,fp8,0,0.7058613300323486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,24,128,0,1,fp8,fp8,0,0.411135991414388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,24,128,0,1,float16,fp8,0,0.4654879967371623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,float16,0,0.39320000012715656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,1,128,0,1,float16,fp8,0,0.3941333293914795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,1,128,0,1,fp8,fp8,0,0.3519306580225627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,2,128,0,1,fp8,fp8,0,0.3561280171076457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,fp8,0,0.3991039991378784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,2,128,0,1,float16,float16,0,0.39632534980773926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,float16,0,0.40354132652282715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,4,128,0,1,fp8,fp8,0,0.36052799224853516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,4,128,0,1,float16,fp8,0,0.40751465161641437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,float16,0,0.4171946843465169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,float16,0,0.2481493353843689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,24,128,0,1,float16,fp8,0,0.2530133326848348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,8,128,0,1,fp8,fp8,0,0.37060264746348065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,24,8,128,0,1,float16,fp8,0,0.42127466201782227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,24,128,0,1,fp8,fp8,0,0.22427733739217123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,float16,0,0.2111413280169169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,1,128,0,1,float16,fp8,0,0.21244800090789795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,1,128,0,1,fp8,fp8,0,0.19381866852442423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,float16,0,0.21422400077184042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,2,128,0,1,fp8,fp8,0,0.19556266069412231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,2,128,0,1,float16,fp8,0,0.21516799926757812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,float16,0,0.21826666593551636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,4,128,0,1,float16,fp8,0,0.21967466672261557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,4,128,0,1,fp8,fp8,0,0.19735999902089438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,float16,0,0.22689600785573324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,8,128,0,1,float16,fp8,0,0.23040000597635904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,float16,0,0.14178133010864258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,24,8,128,0,1,fp8,fp8,0,0.20389866828918457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,24,128,0,1,float16,fp8,0,0.14518400033315024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,24,128,0,1,fp8,fp8,0,0.13011200229326883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,float16,0,0.11797333757082622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,1,128,0,1,fp8,fp8,0,0.10637332995732625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,1,128,0,1,float16,fp8,0,0.11805333693822224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,float16,0,0.11757866541544597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,2,128,0,1,float16,fp8,0,0.11964266498883565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,2,128,0,1,fp8,fp8,0,0.10761066277821858
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,float16,0,0.11987732847531636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,4,128,0,1,float16,fp8,0,0.12078932921091716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,4,128,0,1,fp8,fp8,0,0.11272533734639485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,float16,0,0.1253706713517507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,8,128,0,1,float16,fp8,0,0.12612266341845194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,float16,0,0.08270399769147237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,24,8,128,0,1,fp8,fp8,0,0.11868266264597575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,24,128,0,1,float16,fp8,0,0.08306666711966197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,24,128,0,1,fp8,fp8,0,0.08130133152008057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,float16,0,0.07650133470694225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,1,128,0,1,float16,fp8,0,0.07674666742483775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,1,128,0,1,fp8,fp8,0,0.07044800122578938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,float16,0,0.07692799965540568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,2,128,0,1,float16,fp8,0,0.07685333490371704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,2,128,0,1,fp8,fp8,0,0.06990399956703186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,float16,0,0.07681066791216533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,4,128,0,1,float16,fp8,0,0.07725866635640462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,4,128,0,1,fp8,fp8,0,0.07130133112271626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,float16,0,0.07707199951012929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,8,128,0,1,float16,fp8,0,0.07780266801516215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,float16,0,0.05417599777380625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,24,8,128,0,1,fp8,fp8,0,0.07259733478228252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,24,128,0,1,float16,fp8,0,0.054458667834599815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,24,128,0,1,fp8,fp8,0,0.0518506666024526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,float16,0,0.051957334081331887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,1,128,0,1,float16,fp8,0,0.05202666421731313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,1,128,0,1,fp8,fp8,0,0.04996799925963084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,float16,0,0.052255998055140175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,2,128,0,1,float16,fp8,0,0.05218133330345154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,2,128,0,1,fp8,fp8,0,0.05013866722583771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,float16,0,0.05199466645717621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,4,128,0,1,float16,fp8,0,0.05429866909980774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,4,128,0,1,fp8,fp8,0,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,float16,0,0.05203733344872793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,8,128,0,1,float16,fp8,0,0.05422399938106537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,24,8,128,0,1,fp8,fp8,0,0.051914667089780174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,float16,0,1.5968480110168457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,1,128,0,1,float16,fp8,0,1.59442138671875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,1,128,0,1,fp8,fp8,0,1.5143413543701172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,float16,0,1.62171204884847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,2,128,0,1,float16,fp8,0,1.6289013226826985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,2,128,0,1,fp8,fp8,0,1.624735991160075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,float16,0,1.6566559473673503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,4,128,0,1,float16,fp8,0,1.6558133761088054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,4,128,0,1,fp8,fp8,0,1.6258613268534343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,float16,0,0.9671520392100016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,float16,0,1.7699839274088542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,8,128,0,1,fp8,fp8,0,1.688517411549886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,24,8,128,0,1,float16,fp8,0,1.7525386810302734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,float16,0,0.8097813129425049
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,24,128,0,1,fp8,fp8,0,0.9142453670501709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,1,128,0,1,float16,fp8,0,0.8103733062744141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,24,128,0,1,float16,fp8,0,0.9436266422271729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,1,128,0,1,fp8,fp8,0,0.7680319945017496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,fp8,0,0.8232373396555582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,2,128,0,1,fp8,fp8,0,0.7833120028177897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,2,128,0,1,float16,float16,0,0.8225653171539307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,float16,0,0.8383626937866211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,4,128,0,1,fp8,fp8,0,0.7917280197143555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,4,128,0,1,float16,fp8,0,0.8381600379943848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,float16,0,0.8689813613891602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,float16,0,0.49563201268513996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,24,128,0,1,float16,fp8,0,0.4868160088857015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,8,128,0,1,float16,fp8,0,0.8601760069529215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,24,8,128,0,1,fp8,fp8,0,0.8390186627705892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,float16,0,0.415333350499471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,24,128,0,1,fp8,fp8,0,0.4692586660385132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,1,128,0,1,fp8,fp8,0,0.3932533264160156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,1,128,0,1,float16,fp8,0,0.4150986671447754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,float16,0,0.42303466796875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,2,128,0,1,float16,fp8,0,0.4222986698150635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,2,128,0,1,fp8,fp8,0,0.3985653320948283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,float16,0,0.4341653188069661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,4,128,0,1,float16,fp8,0,0.4305653174718221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,4,128,0,1,fp8,fp8,0,0.40655465920766193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,float16,0,0.44591466585795086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,8,128,0,1,float16,fp8,0,0.44356266657511395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,float16,0,0.26394667228062946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,24,8,128,0,1,fp8,fp8,0,0.41790934403737384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,24,128,0,1,float16,fp8,0,0.2584213415781657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,24,128,0,1,fp8,fp8,0,0.24816532929738364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,float16,0,0.21991467475891113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,1,128,0,1,float16,fp8,0,0.21946666638056436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,1,128,0,1,fp8,fp8,0,0.20353599389394125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,float16,0,0.2232746680577596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,2,128,0,1,float16,fp8,0,0.22300267219543457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,2,128,0,1,fp8,fp8,0,0.20939199129740396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,float16,0,0.22842133045196533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,4,128,0,1,float16,fp8,0,0.22567999362945557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,4,128,0,1,fp8,fp8,0,0.21307732661565146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,float16,0,0.2364799976348877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,8,128,0,1,float16,fp8,0,0.23578667640686035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,float16,0,0.14541866381963095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,24,128,0,1,float16,fp8,0,0.14309333761533102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,24,8,128,0,1,fp8,fp8,0,0.22018667062123617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,24,128,0,1,fp8,fp8,0,0.13607466220855713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,fp8,0,0.11966400345166524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,1,128,0,1,float16,float16,0,0.11980266372362773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,1,128,0,1,fp8,fp8,0,0.11158399780591328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,float16,0,0.11989866693814595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,2,128,0,1,float16,fp8,0,0.12145066261291504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,2,128,0,1,fp8,fp8,0,0.11502933502197266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,float16,0,0.12392000357309978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,4,128,0,1,fp8,fp8,0,0.11764267086982727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,4,128,0,1,float16,fp8,0,0.12316800157229106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,float16,0,0.12985600034395853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,8,128,0,1,float16,fp8,0,0.12843733032544455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,float16,0,0.08474133412043254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,24,8,128,0,1,fp8,fp8,0,0.12227200468381245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,24,128,0,1,float16,fp8,0,0.08122133215268452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,24,128,0,1,fp8,fp8,0,0.08106666803359985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,fp8,0,0.07148799796899159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,1,128,0,1,float16,float16,0,0.07036266724268596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,1,128,0,1,fp8,fp8,0,0.06344000001748402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,float16,0,0.07082133491834004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,2,128,0,1,float16,fp8,0,0.07258666555086772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,2,128,0,1,fp8,fp8,0,0.0641599992911021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,float16,0,0.07070933282375336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,4,128,0,1,float16,fp8,0,0.07065600156784058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,4,128,0,1,fp8,fp8,0,0.06448000172773997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,float16,0,0.07225599884986877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,8,128,0,1,float16,fp8,0,0.07173333565394084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,24,8,128,0,1,fp8,fp8,0,0.06854933500289917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,float16,0,0.05023466547330221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,24,128,0,1,float16,fp8,0,0.049925332268079124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,24,128,0,1,fp8,fp8,0,0.045642669002215065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,float16,0,0.04919999837875366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,1,128,0,1,float16,fp8,0,0.04885333279768626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,1,128,0,1,fp8,fp8,0,0.044031997521718345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,float16,0,0.04804266492525736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,2,128,0,1,float16,fp8,0,0.04943466683228811
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,2,128,0,1,fp8,fp8,0,0.044122666120529175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,float16,0,0.048309331138928734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,4,128,0,1,float16,fp8,0,0.04978133241335551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,4,128,0,1,fp8,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,float16,0,0.04930133124192556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,8,128,0,1,float16,fp8,0,0.049882665276527405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,24,8,128,0,1,fp8,fp8,0,0.044026667873064675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,float16,0,0.0354720006386439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,24,128,0,1,float16,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,24,128,0,1,fp8,fp8,0,0.0317546675602595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,float16,0,0.03379199902216593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,1,128,0,1,float16,fp8,0,0.03311466674009959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,1,128,0,1,fp8,fp8,0,0.030218665798505146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,float16,0,0.03357866654793421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,2,128,0,1,float16,fp8,0,0.03341866781314214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,2,128,0,1,fp8,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,float16,0,0.03370666752258936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,4,128,0,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,4,128,0,1,fp8,fp8,0,0.031930667658646904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,float16,0,0.03366933266321818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,8,128,0,1,float16,fp8,0,0.033743999898433685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,24,8,128,0,1,fp8,fp8,0,0.031712000568707786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,1,128,0,1,fp8,fp8,0,1.3329066435496013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,1,128,0,1,float16,fp8,0,1.3921440442403157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,1,128,0,1,float16,float16,0,1.3933013280232747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,2,128,0,1,float16,float16,0,1.4440213839213054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,2,128,0,1,float16,fp8,0,1.4569706916809082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,2,128,0,1,fp8,fp8,0,1.4533599217732747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,4,128,0,1,float16,float16,0,1.472058614095052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,4,128,0,1,float16,fp8,0,1.4758933385213215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,4,128,0,1,fp8,fp8,0,1.4745279947916667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,24,128,0,1,float16,float16,0,0.8461759885152181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,8,128,0,1,float16,float16,0,1.5390453338623047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,8,128,0,1,fp8,fp8,0,1.5032480557759602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,24,8,128,0,1,float16,fp8,0,1.5328853925069172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,24,128,0,1,float16,fp8,0,0.8284053007761637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,24,128,0,1,fp8,fp8,0,0.822213331858317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,1,128,0,1,float16,fp8,0,0.7068800131479899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,1,128,0,1,float16,float16,0,0.7074027061462402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,1,128,0,1,fp8,fp8,0,0.6754559675852457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,2,128,0,1,float16,float16,0,0.7201226552327474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,2,128,0,1,fp8,fp8,0,0.6944800217946371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,2,128,0,1,float16,fp8,0,0.7200213273366293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,4,128,0,1,float16,float16,0,0.7312213579813639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,4,128,0,1,float16,fp8,0,0.7304746309916178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,4,128,0,1,fp8,fp8,0,0.7118986447652181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,8,128,0,1,float16,float16,0,0.7591626644134521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,8,128,0,1,float16,fp8,0,0.7516000270843506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,24,128,0,1,float16,float16,0,0.4354933500289917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,24,128,0,1,float16,fp8,0,0.4254666566848755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,24,8,128,0,1,fp8,fp8,0,0.7465279897054037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,24,128,0,1,fp8,fp8,0,0.42232000827789307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,1,128,0,1,float16,float16,0,0.3628106514612834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,1,128,0,1,float16,fp8,0,0.3625919818878174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,1,128,0,1,fp8,fp8,0,0.34754665692647296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,2,128,0,1,float16,float16,0,0.3715733289718628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,2,128,0,1,float16,fp8,0,0.3699359893798828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,2,128,0,1,fp8,fp8,0,0.35097066561381024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,4,128,0,1,float16,float16,0,0.3760266701380412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,4,128,0,1,float16,fp8,0,0.3758773406346639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,4,128,0,1,fp8,fp8,0,0.3610293467839559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,8,128,0,1,float16,float16,0,0.3880480130513509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,8,128,0,1,float16,fp8,0,0.3850613435109456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,24,8,128,0,1,fp8,fp8,0,0.3717866738637288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,24,128,0,1,float16,float16,0,0.23244265715281168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,24,128,0,1,float16,fp8,0,0.2260800004005432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,24,128,0,1,fp8,fp8,0,0.22165334224700928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,1,128,0,1,float16,float16,0,0.19298666715621948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,1,128,0,1,float16,fp8,0,0.19158933560053507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,1,128,0,1,fp8,fp8,0,0.17941333850224814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,2,128,0,1,float16,float16,0,0.19558932383855185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,2,128,0,1,float16,fp8,0,0.19619200627009073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,2,128,0,1,fp8,fp8,0,0.18390933672587076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,4,128,0,1,float16,float16,0,0.19783467054367065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,4,128,0,1,float16,fp8,0,0.19865065813064575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,4,128,0,1,fp8,fp8,0,0.1892426609992981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,8,128,0,1,float16,float16,0,0.20548800627390543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,8,128,0,1,float16,fp8,0,0.20397865772247314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,24,8,128,0,1,fp8,fp8,0,0.1960266629854838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,24,128,0,1,float16,float16,0,0.12847466270128885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,24,128,0,1,float16,fp8,0,0.12390399972597758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,24,128,0,1,fp8,fp8,0,0.12266666690508525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,1,128,0,1,float16,float16,0,0.10462400317192078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,1,128,0,1,float16,fp8,0,0.10546132922172546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,1,128,0,1,fp8,fp8,0,0.09742933511734009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,2,128,0,1,float16,float16,0,0.10548266768455505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,2,128,0,1,float16,fp8,0,0.1053013304869334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,2,128,0,1,fp8,fp8,0,0.10108799735705058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,4,128,0,1,float16,float16,0,0.10657067100207011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,4,128,0,1,float16,fp8,0,0.10762133200963338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,4,128,0,1,fp8,fp8,0,0.1037493348121643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,8,128,0,1,float16,float16,0,0.11127466956774394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,24,128,0,1,float16,float16,0,0.07319466769695282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,8,128,0,1,float16,fp8,0,0.11126400033632915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,24,8,128,0,1,fp8,fp8,0,0.1093386709690094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,24,128,0,1,float16,fp8,0,0.07017066578070323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,24,128,0,1,fp8,fp8,0,0.07338133454322815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,1,128,0,1,float16,float16,0,0.0621919979651769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,1,128,0,1,float16,fp8,0,0.062277331948280334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,2,128,0,1,float16,float16,0,0.062650665640831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,1,128,0,1,fp8,fp8,0,0.056688000758488975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,2,128,0,1,float16,fp8,0,0.06257600088914235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,2,128,0,1,fp8,fp8,0,0.05600533386071523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,4,128,0,1,float16,float16,0,0.0631520003080368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,4,128,0,1,float16,fp8,0,0.06281066437562306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,4,128,0,1,fp8,fp8,0,0.056832000613212585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,8,128,0,1,float16,float16,0,0.06452266871929169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,8,128,0,1,float16,fp8,0,0.06371200084686279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,24,128,0,1,float16,float16,0,0.04377066592375437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,24,8,128,0,1,fp8,fp8,0,0.059061333537101746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,24,128,0,1,float16,fp8,0,0.04387199878692627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,24,128,0,1,fp8,fp8,0,0.03992533435424169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,1,128,0,1,float16,float16,0,0.041349334021409355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,1,128,0,1,float16,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,1,128,0,1,fp8,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,2,128,0,1,float16,float16,0,0.041749333341916404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,2,128,0,1,float16,fp8,0,0.04298666616280874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,2,128,0,1,fp8,fp8,0,0.03770666569471359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,4,128,0,1,float16,float16,0,0.04155199974775314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,4,128,0,1,float16,fp8,0,0.042677332957585655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,4,128,0,1,fp8,fp8,0,0.0378560001651446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,8,128,0,1,float16,float16,0,0.04188266893227895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,8,128,0,1,float16,fp8,0,0.04326933125654856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,24,128,0,1,float16,float16,0,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,24,8,128,0,1,fp8,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,24,128,0,1,float16,fp8,0,0.030085332691669464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,24,128,0,1,fp8,fp8,0,0.027290667096773785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,1,128,0,1,float16,float16,0,0.02922666569550832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,1,128,0,1,float16,fp8,0,0.027664000789324444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,2,128,0,1,float16,float16,0,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,1,128,0,1,fp8,fp8,0,0.026074667771657307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,2,128,0,1,float16,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,2,128,0,1,fp8,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,4,128,0,1,float16,float16,0,0.029504001140594482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,4,128,0,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,4,128,0,1,fp8,fp8,0,0.02757866680622101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,8,128,0,1,float16,float16,0,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,8,128,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,24,8,128,0,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,24,128,0,1,float16,float16,0,0.027077332139015198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,24,128,0,1,float16,fp8,0,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,1,128,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,24,128,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,1,128,0,1,float16,fp8,0,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,1,128,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,2,128,0,1,float16,float16,0,0.025573333104451496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,2,128,0,1,float16,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,4,128,0,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,2,128,0,1,fp8,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,4,128,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,4,128,0,1,float16,fp8,0,0.027552001178264618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,8,128,0,1,float16,float16,0,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,8,128,0,1,float16,fp8,0,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,24,8,128,0,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,1,128,0,1,float16,float16,0,0.6427146593729655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,1,128,0,1,fp8,fp8,0,0.6271093289057413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,1,128,0,1,float16,fp8,0,0.6422986586888632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,2,128,0,1,float16,float16,0,0.6526559988657633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,2,128,0,1,fp8,fp8,0,0.6364906628926595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,2,128,0,1,float16,fp8,0,0.6526133219401041
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,4,128,0,1,float16,float16,0,0.6665066480636597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,4,128,0,1,float16,fp8,0,0.6633813381195068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,4,128,0,1,fp8,fp8,0,0.645466685295105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,8,128,0,1,float16,float16,0,0.6969813505808512
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,24,128,0,1,float16,float16,0,0.4001386562983195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,8,128,0,1,float16,fp8,0,0.682533343633016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,24,128,0,1,float16,fp8,0,0.38938132921854657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,24,8,128,0,1,fp8,fp8,0,0.708725372950236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,24,128,0,1,fp8,fp8,0,0.39340798060099286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,1,128,0,1,float16,float16,0,0.3299573262532552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,1,128,0,1,float16,fp8,0,0.32916800181070965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,1,128,0,1,fp8,fp8,0,0.3205333352088928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,2,128,0,1,float16,float16,0,0.33535468578338623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,2,128,0,1,float16,fp8,0,0.33429865042368573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,2,128,0,1,fp8,fp8,0,0.3239946762720744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,4,128,0,1,float16,float16,0,0.3423200050989787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,4,128,0,1,float16,fp8,0,0.33963199456532794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,4,128,0,1,fp8,fp8,0,0.3317333261171977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,8,128,0,1,float16,float16,0,0.35339732964833576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,8,128,0,1,float16,fp8,0,0.3503146568934123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,24,128,0,1,float16,float16,0,0.21414399147033691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,24,8,128,0,1,fp8,fp8,0,0.34665600458780926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,24,128,0,1,float16,fp8,0,0.20945600668589273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,24,128,0,1,fp8,fp8,0,0.2101759910583496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,1,128,0,1,float16,float16,0,0.17659199237823486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,1,128,0,1,float16,fp8,0,0.17557867368062338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,1,128,0,1,fp8,fp8,0,0.16541866461435953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,2,128,0,1,float16,float16,0,0.17945067087809244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,2,128,0,1,float16,fp8,0,0.1793066660563151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,2,128,0,1,fp8,fp8,0,0.1705013314882914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,4,128,0,1,float16,float16,0,0.1818880041440328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,4,128,0,1,float16,fp8,0,0.18028799692789713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,4,128,0,1,fp8,fp8,0,0.1755626598993937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,8,128,0,1,float16,float16,0,0.18847999970118204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,8,128,0,1,float16,fp8,0,0.18639467159907022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,24,8,128,0,1,fp8,fp8,0,0.18309332927068075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,24,128,0,1,float16,float16,0,0.11877333124478658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,24,128,0,1,float16,fp8,0,0.1163200040658315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,24,128,0,1,fp8,fp8,0,0.11603200435638428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,1,128,0,1,float16,float16,0,0.0976639986038208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,1,128,0,1,float16,fp8,0,0.09878399968147278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,1,128,0,1,fp8,fp8,0,0.08938133716583252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,2,128,0,1,float16,float16,0,0.09899733463923137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,2,128,0,1,float16,fp8,0,0.09923733274141948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,2,128,0,1,fp8,fp8,0,0.09452266494433086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,4,128,0,1,float16,float16,0,0.10130666693051656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,4,128,0,1,float16,fp8,0,0.10020800431569417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,4,128,0,1,fp8,fp8,0,0.09667733311653137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,8,128,0,1,float16,float16,0,0.10502400000890096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,8,128,0,1,float16,fp8,0,0.10334400335947673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,24,8,128,0,1,fp8,fp8,0,0.10188266634941101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,24,128,0,1,float16,fp8,0,0.06507200002670288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,24,128,0,1,float16,float16,0,0.06628799935181935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,24,128,0,1,fp8,fp8,0,0.06919466455777486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,1,128,0,1,float16,float16,0,0.05795733133951823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,1,128,0,1,float16,fp8,0,0.056048000852266945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,2,128,0,1,float16,float16,0,0.05782933533191681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,1,128,0,1,fp8,fp8,0,0.05221866567929586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,2,128,0,1,float16,fp8,0,0.056757330894470215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,2,128,0,1,fp8,fp8,0,0.052576000491778054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,4,128,0,1,float16,float16,0,0.0582239975531896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,4,128,0,1,float16,fp8,0,0.056373332937558494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,4,128,0,1,fp8,fp8,0,0.05240533252557119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,8,128,0,1,float16,float16,0,0.058362667759259544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,8,128,0,1,float16,fp8,0,0.058335999647776283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,24,8,128,0,1,fp8,fp8,0,0.055434669057528176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,24,128,0,1,float16,float16,0,0.04008533308903376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,24,128,0,1,float16,fp8,0,0.041696002086003624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,24,128,0,1,fp8,fp8,0,0.03836799909671148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,1,128,0,1,float16,float16,0,0.03775466730197271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,1,128,0,1,float16,fp8,0,0.03805333375930786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,1,128,0,1,fp8,fp8,0,0.035445332527160645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,2,128,0,1,float16,float16,0,0.038880000511805214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,2,128,0,1,float16,fp8,0,0.03845866769552231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,2,128,0,1,fp8,fp8,0,0.03600533306598663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,4,128,0,1,float16,float16,0,0.03977066775163015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,4,128,0,1,float16,fp8,0,0.040048000713189445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,4,128,0,1,fp8,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,8,128,0,1,float16,float16,0,0.03987200061480204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,8,128,0,1,float16,fp8,0,0.039936001102129616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,24,8,128,0,1,fp8,fp8,0,0.036544000109036766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,24,128,0,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,24,128,0,1,float16,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,24,128,0,1,fp8,fp8,0,0.02740799884001414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,1,128,0,1,fp8,fp8,0,0.024832000335057575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,1,128,0,1,float16,float16,0,0.027072000006834667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,1,128,0,1,float16,fp8,0,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,2,128,0,1,float16,float16,0,0.02565866708755493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,2,128,0,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,2,128,0,1,fp8,fp8,0,0.02404266595840454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,4,128,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,4,128,0,1,float16,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,4,128,0,1,fp8,fp8,0,0.025626666843891144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,8,128,0,1,float16,float16,0,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,8,128,0,1,float16,fp8,0,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,24,8,128,0,1,fp8,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,24,128,0,1,float16,float16,0,0.023365333676338196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,24,128,0,1,fp8,fp8,0,0.022304000953833263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,24,128,0,1,float16,fp8,0,0.025311999022960663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,1,128,0,1,float16,float16,0,0.0230880007147789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,1,128,0,1,float16,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,1,128,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,2,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,2,128,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,2,128,0,1,fp8,fp8,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,4,128,0,1,float16,float16,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,4,128,0,1,float16,fp8,0,0.025477332373460133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,8,128,0,1,float16,float16,0,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,4,128,0,1,fp8,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,8,128,0,1,float16,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,24,8,128,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,24,128,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,24,128,0,1,fp8,fp8,0,0.020608000457286835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,24,128,0,1,float16,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,1,128,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,1,128,0,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,1,128,0,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,2,128,0,1,float16,float16,0,0.021770666042963665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,2,128,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,2,128,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,4,128,0,1,float16,float16,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,4,128,0,1,float16,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,4,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,8,128,0,1,float16,float16,0,0.021829334398110706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,8,128,0,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,24,8,128,0,1,fp8,fp8,0,0.01893866683046023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,1,128,0,1,float16,float16,0,0.3542879819869995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,1,128,0,1,float16,fp8,0,0.35263999303181964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,1,128,0,1,fp8,fp8,0,0.3483946720759074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,2,128,0,1,float16,float16,0,0.3609120051066081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,2,128,0,1,float16,fp8,0,0.360640009244283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,2,128,0,1,fp8,fp8,0,0.35275201002756756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,4,128,0,1,float16,float16,0,0.36605334281921387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,4,128,0,1,float16,fp8,0,0.3662559986114502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,8,128,0,1,float16,float16,0,0.3770933151245117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,4,128,0,1,fp8,fp8,0,0.36035199960072833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,24,128,0,1,float16,float16,0,0.22196267048517862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,8,128,0,1,float16,fp8,0,0.37706132729848224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,24,8,128,0,1,fp8,fp8,0,0.3790666659673055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,24,128,0,1,float16,fp8,0,0.21708800395329794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,1,128,0,1,float16,float16,0,0.18625599145889282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,24,128,0,1,fp8,fp8,0,0.22197866439819336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,1,128,0,1,float16,fp8,0,0.187717338403066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,1,128,0,1,fp8,fp8,0,0.1791200041770935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,2,128,0,1,float16,float16,0,0.18973867098490396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,2,128,0,1,float16,fp8,0,0.18966400623321533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,2,128,0,1,fp8,fp8,0,0.18517333269119263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,4,128,0,1,float16,float16,0,0.19241599241892496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,4,128,0,1,float16,fp8,0,0.19156267245610556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,4,128,0,1,fp8,fp8,0,0.18811200062433878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,8,128,0,1,float16,float16,0,0.1976319948832194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,8,128,0,1,fp8,fp8,0,0.1970133384068807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,24,8,128,0,1,float16,fp8,0,0.19664533933003744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,24,128,0,1,float16,float16,0,0.12001066406567891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,24,128,0,1,float16,fp8,0,0.11739733815193176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,24,128,0,1,fp8,fp8,0,0.12017066280047099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,1,128,0,1,float16,float16,0,0.1030453344186147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,1,128,0,1,float16,fp8,0,0.10146666566530864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,1,128,0,1,fp8,fp8,0,0.09662933150927226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,2,128,0,1,float16,float16,0,0.10309333602587382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,2,128,0,1,float16,fp8,0,0.10356799761454265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,2,128,0,1,fp8,fp8,0,0.10044800241788228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,4,128,0,1,float16,float16,0,0.10573333501815796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,4,128,0,1,float16,fp8,0,0.1037013332049052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,4,128,0,1,fp8,fp8,0,0.10161599516868591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,8,128,0,1,float16,float16,0,0.10854933659235637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,8,128,0,1,float16,fp8,0,0.10760000348091125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,24,8,128,0,1,fp8,fp8,0,0.1076586643854777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,24,128,0,1,float16,float16,0,0.06894400219122569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,24,128,0,1,float16,fp8,0,0.06764266888300578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,24,128,0,1,fp8,fp8,0,0.07030400137106578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,1,128,0,1,float16,float16,0,0.060319999853769936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,1,128,0,1,float16,fp8,0,0.06046399970849355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,1,128,0,1,fp8,fp8,0,0.054341331124305725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,2,128,0,1,float16,float16,0,0.0602400004863739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,2,128,0,1,float16,fp8,0,0.05978133281071981
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,2,128,0,1,fp8,fp8,0,0.05563200016816457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,4,128,0,1,float16,float16,0,0.05997333427270254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,4,128,0,1,float16,fp8,0,0.06028800209363302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,4,128,0,1,fp8,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,8,128,0,1,float16,float16,0,0.06042666733264923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,8,128,0,1,float16,fp8,0,0.06043200194835663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,24,128,0,1,float16,float16,0,0.04008000095685323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,24,8,128,0,1,fp8,fp8,0,0.0599839985370636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,24,128,0,1,float16,fp8,0,0.03962666789690653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,24,128,0,1,fp8,fp8,0,0.03792533278465271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,1,128,0,1,float16,float16,0,0.03781333317359289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,1,128,0,1,float16,fp8,0,0.037845333417256675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,1,128,0,1,fp8,fp8,0,0.03426666557788849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,2,128,0,1,float16,float16,0,0.03766400118668874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,2,128,0,1,float16,fp8,0,0.037791999677817024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,2,128,0,1,fp8,fp8,0,0.03497066597143809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,4,128,0,1,float16,float16,0,0.03754666695992152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,4,128,0,1,fp8,fp8,0,0.03449599941571554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,8,128,0,1,float16,float16,0,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,4,128,0,1,float16,fp8,0,0.038378665844599404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,8,128,0,1,float16,fp8,0,0.03756266583998998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,24,8,128,0,1,fp8,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,24,128,0,1,float16,float16,0,0.02787200113137563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,24,128,0,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,24,128,0,1,float16,fp8,0,0.02865600089232127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,1,128,0,1,float16,fp8,0,0.027808000644048054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,1,128,0,1,float16,float16,0,0.027973333994547527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,1,128,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,2,128,0,1,float16,fp8,0,0.0276053324341774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,2,128,0,1,fp8,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,2,128,0,1,float16,float16,0,0.02815466622511546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,4,128,0,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,4,128,0,1,float16,float16,0,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,4,128,0,1,fp8,fp8,0,0.02738133321205775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,8,128,0,1,float16,float16,0,0.02790933350721995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,8,128,0,1,float16,fp8,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,24,8,128,0,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,24,128,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,24,128,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,24,128,0,1,float16,fp8,0,0.021925332645575207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,1,128,0,1,float16,float16,0,0.01926400015751521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,1,128,0,1,float16,fp8,0,0.019589333484570186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,1,128,0,1,fp8,fp8,0,0.020928000410397846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,2,128,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,2,128,0,1,float16,float16,0,0.019776000330845516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,2,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,4,128,0,1,float16,float16,0,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,4,128,0,1,float16,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,8,128,0,1,float16,float16,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,4,128,0,1,fp8,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,8,128,0,1,float16,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,24,8,128,0,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,24,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,24,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,24,128,0,1,float16,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,1,128,0,1,float16,float16,0,0.018730666488409042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,1,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,1,128,0,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,2,128,0,1,float16,float16,0,0.017680000513792038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,2,128,0,1,float16,fp8,0,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,2,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,4,128,0,1,float16,float16,0,0.01830400029818217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,4,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,4,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,8,128,0,1,float16,float16,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,8,128,0,1,float16,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,24,8,128,0,1,fp8,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,24,128,0,1,float16,float16,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,24,128,0,1,float16,fp8,0,0.017573333034912746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,24,128,0,1,fp8,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,1,128,0,1,float16,float16,0,0.017711999515692394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,1,128,0,1,float16,fp8,0,0.018031999468803406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,1,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,2,128,0,1,float16,float16,0,0.017621333400408428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,2,128,0,1,float16,fp8,0,0.01912533367673556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,2,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,4,128,0,1,float16,float16,0,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,4,128,0,1,float16,fp8,0,0.018709332992633183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,4,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,8,128,0,1,float16,float16,0,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,8,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,24,8,128,0,1,float16,fp8,0,0.018853332847356796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,1,128,0,1,float16,float16,0,0.23226666450500488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,1,128,0,1,fp8,fp8,0,0.21821333964665732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,1,128,0,1,float16,fp8,0,0.23175466060638428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,2,128,0,1,float16,float16,0,0.23375999927520752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,2,128,0,1,float16,fp8,0,0.23452800512313843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,2,128,0,1,fp8,fp8,0,0.22603732347488403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,4,128,0,1,float16,float16,0,0.23570134242375693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,4,128,0,1,fp8,fp8,0,0.22721600532531738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,4,128,0,1,float16,fp8,0,0.23696533838907877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,8,128,0,1,float16,float16,0,0.24157333374023438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,8,128,0,1,float16,fp8,0,0.24197334051132202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,24,8,128,0,1,fp8,fp8,0,0.2379466692606608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,24,128,0,1,float16,float16,0,0.14046933253606161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,24,128,0,1,float16,fp8,0,0.13980266451835632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,24,128,0,1,fp8,fp8,0,0.13994133472442627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,1,128,0,1,float16,float16,0,0.12591999769210815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,1,128,0,1,float16,fp8,0,0.12533866365750632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,1,128,0,1,fp8,fp8,0,0.11485866705576579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,2,128,0,1,float16,float16,0,0.12589333454767862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,2,128,0,1,float16,fp8,0,0.1260479986667633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,2,128,0,1,fp8,fp8,0,0.11955199639002483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,4,128,0,1,float16,float16,0,0.12658133109410605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,4,128,0,1,float16,fp8,0,0.12599999705950418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,4,128,0,1,fp8,fp8,0,0.12178666392962138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,8,128,0,1,float16,float16,0,0.1300106644630432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,8,128,0,1,float16,fp8,0,0.12800533572832742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,24,8,128,0,1,fp8,fp8,0,0.12822399536768594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,24,128,0,1,float16,float16,0,0.07747733096281688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,24,128,0,1,float16,fp8,0,0.07865599791208903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,24,128,0,1,fp8,fp8,0,0.08111999928951263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,1,128,0,1,float16,float16,0,0.07022933165232341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,1,128,0,1,float16,fp8,0,0.0703306645154953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,1,128,0,1,fp8,fp8,0,0.06425599753856659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,2,128,0,1,float16,float16,0,0.0705973356962204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,2,128,0,1,float16,fp8,0,0.0705973356962204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,2,128,0,1,fp8,fp8,0,0.064410666624705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,4,128,0,1,float16,fp8,0,0.07044800122578938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,4,128,0,1,fp8,fp8,0,0.06425066788991292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,4,128,0,1,float16,float16,0,0.0721973329782486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,8,128,0,1,float16,float16,0,0.07177599767843883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,8,128,0,1,float16,fp8,0,0.07056533296902974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,24,8,128,0,1,fp8,fp8,0,0.06658666829268138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,24,128,0,1,float16,float16,0,0.046165332198143005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,24,128,0,1,float16,fp8,0,0.04548799991607666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,24,128,0,1,fp8,fp8,0,0.04206933577855428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,1,128,0,1,float16,float16,0,0.043391997615496315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,1,128,0,1,fp8,fp8,0,0.03995733211437861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,1,128,0,1,float16,fp8,0,0.043840001026789345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,2,128,0,1,float16,float16,0,0.04233066737651825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,2,128,0,1,float16,fp8,0,0.04394133388996124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,2,128,0,1,fp8,fp8,0,0.03951466580231985
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,4,128,0,1,float16,float16,0,0.04428799947102865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,4,128,0,1,float16,fp8,0,0.044010668992996216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,4,128,0,1,fp8,fp8,0,0.03994133323431015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,8,128,0,1,float16,float16,0,0.04393066465854645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,8,128,0,1,float16,fp8,0,0.04448533554871877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,24,8,128,0,1,fp8,fp8,0,0.04151466737190882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,24,128,0,1,float16,float16,0,0.029370665550231934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,24,128,0,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,24,128,0,1,fp8,fp8,0,0.028890666862328846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,1,128,0,1,float16,float16,0,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,1,128,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,1,128,0,1,fp8,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,2,128,0,1,float16,float16,0,0.02978666623433431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,2,128,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,2,128,0,1,fp8,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,4,128,0,1,float16,float16,0,0.029135999580224354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,4,128,0,1,float16,fp8,0,0.030218665798505146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,8,128,0,1,float16,float16,0,0.029418667157491047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,4,128,0,1,fp8,fp8,0,0.02992533395687739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,8,128,0,1,float16,fp8,0,0.029631999631722767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,24,8,128,0,1,fp8,fp8,0,0.02945599953333537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,24,128,0,1,float16,float16,0,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,24,128,0,1,float16,fp8,0,0.02372266600529353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,24,128,0,1,fp8,fp8,0,0.02359466751416524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,1,128,0,1,float16,float16,0,0.023589332898457844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,1,128,0,1,float16,fp8,0,0.022815999885400135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,1,128,0,1,fp8,fp8,0,0.02147199958562851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,2,128,0,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,2,128,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,2,128,0,1,fp8,fp8,0,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,4,128,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,4,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,4,128,0,1,fp8,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,8,128,0,1,float16,float16,0,0.023247999449570973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,8,128,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,24,8,128,0,1,fp8,fp8,0,0.02378133436044057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,24,128,0,1,float16,float16,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,24,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,24,128,0,1,fp8,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,1,128,0,1,float16,float16,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,1,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,1,128,0,1,fp8,fp8,0,0.01629866659641266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,2,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,2,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,2,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,4,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,4,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,4,128,0,1,fp8,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,8,128,0,1,float16,float16,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,8,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,24,8,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,24,128,0,1,float16,float16,0,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,24,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,24,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,1,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,1,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,1,128,0,1,fp8,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,2,128,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,2,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,4,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,2,128,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,4,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,4,128,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,8,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,8,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,24,8,128,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,24,128,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,24,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,24,128,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,1,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,1,128,0,1,float16,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,1,128,0,1,fp8,fp8,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,2,128,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,2,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,2,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,4,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,4,128,0,1,float16,fp8,0,0.0161013330022494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,4,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,8,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,8,128,0,1,float16,fp8,0,0.016224000602960587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,24,8,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,1,128,0,1,fp8,fp8,0,0.15915200114250183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,1,128,0,1,float16,fp8,0,0.17115734020868936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,1,128,0,1,float16,float16,0,0.17108800013860068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,2,128,0,1,float16,float16,0,0.17011733849843344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,2,128,0,1,float16,fp8,0,0.17108800013860068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,2,128,0,1,fp8,fp8,0,0.16274666786193848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,4,128,0,1,float16,float16,0,0.17108800013860068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,4,128,0,1,float16,fp8,0,0.17082132895787558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,4,128,0,1,fp8,fp8,0,0.16540799538294473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,8,128,0,1,float16,float16,0,0.1751306653022766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,8,128,0,1,float16,fp8,0,0.17498666048049927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,24,8,128,0,1,fp8,fp8,0,0.16979199647903442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,24,128,0,1,float16,float16,0,0.10130666693051656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,24,128,0,1,float16,fp8,0,0.09929600358009338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,24,128,0,1,fp8,fp8,0,0.10155733426411946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,1,128,0,1,float16,float16,0,0.09197333455085754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,1,128,0,1,float16,fp8,0,0.09196799993515015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,1,128,0,1,fp8,fp8,0,0.08497066299120586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,2,128,0,1,float16,float16,0,0.09257066249847412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,2,128,0,1,float16,fp8,0,0.09224533041318257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,2,128,0,1,fp8,fp8,0,0.08667733271916707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,4,128,0,1,float16,float16,0,0.09311466415723164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,4,128,0,1,float16,fp8,0,0.09316800038019817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,4,128,0,1,fp8,fp8,0,0.0867680013179779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,8,128,0,1,float16,fp8,0,0.09327999750773112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,24,128,0,1,float16,float16,0,0.05605866511662801
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,8,128,0,1,float16,float16,0,0.09556800127029419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,24,8,128,0,1,fp8,fp8,0,0.08873599767684937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,24,128,0,1,float16,fp8,0,0.05667733152707418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,24,128,0,1,fp8,fp8,0,0.05423999826113383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,1,128,0,1,float16,fp8,0,0.05421333511670431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,1,128,0,1,fp8,fp8,0,0.05175999800364176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,1,128,0,1,float16,float16,0,0.055349335074424744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,2,128,0,1,float16,float16,0,0.05407999952634176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,2,128,0,1,float16,fp8,0,0.05420800050099691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,2,128,0,1,fp8,fp8,0,0.05193600058555603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,4,128,0,1,float16,float16,0,0.05464000006516775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,4,128,0,1,float16,fp8,0,0.05585066477457682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,8,128,0,1,float16,float16,0,0.05625600119431814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,4,128,0,1,fp8,fp8,0,0.05028266708056132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,8,128,0,1,float16,fp8,0,0.05412266651789347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,24,8,128,0,1,fp8,fp8,0,0.052383999029795326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,24,128,0,1,float16,float16,0,0.035391998787721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,24,128,0,1,float16,fp8,0,0.035402665535608925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,24,128,0,1,fp8,fp8,0,0.03392533212900162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,1,128,0,1,float16,float16,0,0.03594133257865906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,1,128,0,1,float16,fp8,0,0.035631999373435974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,1,128,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,2,128,0,1,float16,float16,0,0.03569599986076355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,2,128,0,1,float16,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,2,128,0,1,fp8,fp8,0,0.03366933266321818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,4,128,0,1,float16,float16,0,0.035461333890755974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,4,128,0,1,float16,fp8,0,0.03576533248027166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,4,128,0,1,fp8,fp8,0,0.03569599986076355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,8,128,0,1,float16,float16,0,0.03659733384847641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,8,128,0,1,float16,fp8,0,0.03571200122435888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,24,128,0,1,float16,float16,0,0.025600001215934753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,24,128,0,1,float16,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,24,8,128,0,1,fp8,fp8,0,0.0352960005402565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,24,128,0,1,fp8,fp8,0,0.02521066615978877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,1,128,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,1,128,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,1,128,0,1,fp8,fp8,0,0.023386667172114056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,2,128,0,1,float16,float16,0,0.025498665869235992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,2,128,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,2,128,0,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,4,128,0,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,4,128,0,1,float16,float16,0,0.025087999800841015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,4,128,0,1,fp8,fp8,0,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,8,128,0,1,float16,float16,0,0.026533332963784535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,8,128,0,1,float16,fp8,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,24,8,128,0,1,fp8,fp8,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,24,128,0,1,float16,float16,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,24,128,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,24,128,0,1,fp8,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,1,128,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,1,128,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,1,128,0,1,fp8,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,2,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,2,128,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,2,128,0,1,fp8,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,4,128,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,4,128,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,4,128,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,8,128,0,1,float16,float16,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,8,128,0,1,float16,fp8,0,0.019578666736682255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,24,8,128,0,1,fp8,fp8,0,0.018992000569899876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,24,128,0,1,float16,float16,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,24,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,24,128,0,1,fp8,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,1,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,1,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,1,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,2,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,2,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,2,128,0,1,fp8,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,4,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,4,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,4,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,8,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,8,128,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,24,8,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,24,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,24,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,24,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,1,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,1,128,0,1,float16,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,1,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,2,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,2,128,0,1,float16,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,2,128,0,1,fp8,fp8,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,4,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,4,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,4,128,0,1,fp8,fp8,0,0.015754666179418564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,8,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,8,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,24,8,128,0,1,fp8,fp8,0,0.016336000214020412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,24,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,24,128,0,1,float16,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,24,128,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,1,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,1,128,0,1,float16,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,1,128,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,2,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,2,128,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,2,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,4,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,4,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,4,128,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,8,128,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,8,128,0,1,float16,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,24,8,128,0,1,fp8,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,1,128,0,1,float16,float16,0,0.13885866602261862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,1,128,0,1,float16,fp8,0,0.14045332868893942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,1,128,0,1,fp8,fp8,0,0.13181333740552267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,2,128,0,1,float16,float16,0,0.13885333140691122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,2,128,0,1,float16,fp8,0,0.1402239998181661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,4,128,0,1,float16,float16,0,0.14039466778437296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,2,128,0,1,fp8,fp8,0,0.1322879989941915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,4,128,0,1,float16,fp8,0,0.13920000195503235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,4,128,0,1,fp8,fp8,0,0.13196266690889993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,8,128,0,1,float16,float16,0,0.14220266540845236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,8,128,0,1,float16,fp8,0,0.14038399855295816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,24,128,0,1,float16,float16,0,0.0802346666653951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,24,8,128,0,1,fp8,fp8,0,0.13590400417645773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,24,128,0,1,float16,fp8,0,0.07879466811815898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,24,128,0,1,fp8,fp8,0,0.07790933549404144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,1,128,0,1,float16,float16,0,0.07787199815114339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,1,128,0,1,float16,fp8,0,0.07667733232180278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,1,128,0,1,fp8,fp8,0,0.07258133093516032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,2,128,0,1,float16,float16,0,0.07860266665617625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,2,128,0,1,float16,fp8,0,0.07866666714350383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,2,128,0,1,fp8,fp8,0,0.07294400036334991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,4,128,0,1,float16,fp8,0,0.07866666714350383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,4,128,0,1,float16,float16,0,0.07858666777610779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,4,128,0,1,fp8,fp8,0,0.07292266686757405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,8,128,0,1,float16,float16,0,0.0767626663049062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,8,128,0,1,float16,fp8,0,0.07728533446788788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,24,8,128,0,1,fp8,fp8,0,0.07400533556938171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,24,128,0,1,float16,float16,0,0.04645333190759023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,24,128,0,1,float16,fp8,0,0.04769066472848257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,24,128,0,1,fp8,fp8,0,0.04586666822433472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,1,128,0,1,float16,fp8,0,0.046207999189694725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,1,128,0,1,float16,float16,0,0.04613333443800608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,1,128,0,1,fp8,fp8,0,0.044405331214269005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,2,128,0,1,float16,float16,0,0.04594666759173075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,2,128,0,1,float16,fp8,0,0.046122665206591286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,2,128,0,1,fp8,fp8,0,0.04398933549722036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,4,128,0,1,float16,float16,0,0.04784533381462097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,4,128,0,1,float16,fp8,0,0.04614399870236715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,4,128,0,1,fp8,fp8,0,0.04557333389918009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,8,128,0,1,float16,float16,0,0.047685335079828896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,8,128,0,1,float16,fp8,0,0.04602666695912679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,24,8,128,0,1,fp8,fp8,0,0.045781334241231285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,24,128,0,1,float16,float16,0,0.033514666060606636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,24,128,0,1,float16,fp8,0,0.03126933425664902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,24,128,0,1,fp8,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,1,128,0,1,float16,float16,0,0.03162133445342382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,1,128,0,1,float16,fp8,0,0.030879999200503033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,1,128,0,1,fp8,fp8,0,0.031104000906149547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,2,128,0,1,float16,float16,0,0.031514666974544525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,2,128,0,1,float16,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,2,128,0,1,fp8,fp8,0,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,4,128,0,1,float16,float16,0,0.03164266546567281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,4,128,0,1,float16,fp8,0,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,4,128,0,1,fp8,fp8,0,0.030896000564098358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,8,128,0,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,8,128,0,1,float16,fp8,0,0.03148266673088074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,24,128,0,1,float16,float16,0,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,24,128,0,1,float16,fp8,0,0.023103999594847362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,24,8,128,0,1,fp8,fp8,0,0.031221332649389904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,24,128,0,1,fp8,fp8,0,0.022085333863894146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,1,128,0,1,float16,float16,0,0.021045332153638203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,1,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,1,128,0,1,fp8,fp8,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,2,128,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,2,128,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,2,128,0,1,fp8,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,4,128,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,4,128,0,1,float16,fp8,0,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,4,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,8,128,0,1,float16,float16,0,0.02162133405605952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,8,128,0,1,float16,fp8,0,0.023050665855407715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,24,8,128,0,1,fp8,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,24,128,0,1,float16,float16,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,24,128,0,1,float16,fp8,0,0.019413333386182785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,24,128,0,1,fp8,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,1,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,1,128,0,1,float16,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,1,128,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,2,128,0,1,float16,float16,0,0.018394666413466137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,2,128,0,1,float16,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,2,128,0,1,fp8,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,4,128,0,1,float16,float16,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,4,128,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,4,128,0,1,fp8,fp8,0,0.017664000391960144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,8,128,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,8,128,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,24,8,128,0,1,fp8,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,24,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,24,128,0,1,float16,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,24,128,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,1,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,1,128,0,1,float16,float16,0,0.01766933376590411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,1,128,0,1,fp8,fp8,0,0.016693333784739178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,2,128,0,1,float16,float16,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,2,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,2,128,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,4,128,0,1,float16,float16,0,0.01672533278663953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,4,128,0,1,float16,fp8,0,0.015872000406185787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,4,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,8,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,8,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,24,8,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,24,128,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,24,128,0,1,float16,fp8,0,0.01571200042963028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,24,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,1,128,0,1,float16,float16,0,0.015573333948850632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,1,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,1,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,2,128,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,2,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,2,128,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,4,128,0,1,float16,float16,0,0.01482133318980535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,4,128,0,1,float16,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,4,128,0,1,fp8,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,8,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,8,128,0,1,float16,fp8,0,0.016773333152135212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,24,8,128,0,1,fp8,fp8,0,0.01747200017174085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,24,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,24,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,24,128,0,1,fp8,fp8,0,0.01664000004529953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,1,128,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,1,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,1,128,0,1,fp8,fp8,0,0.015429332852363586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,2,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,2,128,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,2,128,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,4,128,0,1,float16,float16,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,4,128,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,4,128,0,1,fp8,fp8,0,0.015925332903862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,8,128,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,8,128,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,24,8,128,0,1,fp8,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,1,128,0,1,float16,float16,0,0.1184213360150655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,1,128,0,1,float16,fp8,0,0.11956800023714702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,1,128,0,1,fp8,fp8,0,0.10794666409492493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,2,128,0,1,float16,fp8,0,0.11989333232243855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,2,128,0,1,float16,float16,0,0.11779733498891194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,2,128,0,1,fp8,fp8,0,0.10773332913716634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,4,128,0,1,float16,float16,0,0.11762666702270508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,4,128,0,1,float16,fp8,0,0.11781332890192668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,4,128,0,1,fp8,fp8,0,0.10763733585675557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,8,128,0,1,float16,float16,0,0.11805333693822224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,8,128,0,1,float16,fp8,0,0.11780266960461934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,24,8,128,0,1,fp8,fp8,0,0.10771733522415161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,0,0.06660800178845723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,0,0.06660800178845723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,24,128,0,1,fp8,fp8,0,0.06208533545335134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,1,128,0,1,float16,float16,0,0.06678399940331776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,1,128,0,1,float16,fp8,0,0.06614933411280315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,1,128,0,1,fp8,fp8,0,0.062362665931383766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,2,128,0,1,float16,float16,0,0.06690666576226552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,2,128,0,1,float16,fp8,0,0.06622933348019917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,2,128,0,1,fp8,fp8,0,0.060746664802233376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,4,128,0,1,float16,float16,0,0.06682666639486949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,4,128,0,1,float16,fp8,0,0.06663466493288676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,4,128,0,1,fp8,fp8,0,0.06081599990526835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,8,128,0,1,float16,float16,0,0.06666666766007741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,8,128,0,1,float16,fp8,0,0.06621866424878438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,24,8,128,0,1,fp8,fp8,0,0.061994666854540505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,0,0.04030400017897288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,24,128,0,1,fp8,fp8,0,0.03955200066169103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,0,0.04171733558177948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,1,128,0,1,float16,fp8,0,0.04196799794832865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,1,128,0,1,fp8,fp8,0,0.037477334340413414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,1,128,0,1,float16,float16,0,0.04077333211898804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,2,128,0,1,float16,float16,0,0.04172799984614054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,2,128,0,1,float16,fp8,0,0.041509332756201424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,2,128,0,1,fp8,fp8,0,0.03779733429352442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,4,128,0,1,float16,float16,0,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,4,128,0,1,fp8,fp8,0,0.0396373321612676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,4,128,0,1,float16,fp8,0,0.04123199979464213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,8,128,0,1,float16,float16,0,0.040448000033696495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,8,128,0,1,float16,fp8,0,0.04165866722663244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,24,8,128,0,1,fp8,fp8,0,0.037632000943024956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,0,0.02938133229811986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,24,128,0,1,fp8,fp8,0,0.027232001225153606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,1,128,0,1,float16,float16,0,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,1,128,0,1,float16,fp8,0,0.02903466671705246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,1,128,0,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,2,128,0,1,float16,float16,0,0.02940800040960312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,2,128,0,1,float16,fp8,0,0.02784000088771184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,2,128,0,1,fp8,fp8,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,4,128,0,1,float16,float16,0,0.02945599953333537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,4,128,0,1,fp8,fp8,0,0.027098665634791057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,4,128,0,1,float16,fp8,0,0.03028800090154012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,8,128,0,1,float16,float16,0,0.027221334477265675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,8,128,0,1,float16,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,24,8,128,0,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,24,128,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,0,0.02197866638501485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,1,128,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,1,128,0,1,float16,fp8,0,0.02309333284695943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,1,128,0,1,fp8,fp8,0,0.02082666630546252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,2,128,0,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,2,128,0,1,float16,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,4,128,0,1,float16,float16,0,0.025402667621771496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,2,128,0,1,fp8,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,4,128,0,1,float16,fp8,0,0.023946667710940044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,4,128,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,8,128,0,1,float16,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,8,128,0,1,float16,float16,0,0.022917332748572033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,0,0.018725333114465077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,24,128,0,1,fp8,fp8,0,0.017685333887736004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,24,8,128,0,1,fp8,fp8,0,0.021589333812395733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,1,128,0,1,float16,float16,0,0.019109333554903667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,1,128,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,1,128,0,1,fp8,fp8,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,2,128,0,1,float16,float16,0,0.019909333437681198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,2,128,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,2,128,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,4,128,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,4,128,0,1,float16,fp8,0,0.019440000255902607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,4,128,0,1,fp8,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,8,128,0,1,float16,float16,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,8,128,0,1,float16,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,24,8,128,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,1,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,24,128,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,1,128,0,1,float16,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,2,128,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,1,128,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,2,128,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,2,128,0,1,fp8,fp8,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,4,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,4,128,0,1,float16,fp8,0,0.016613333175579708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,4,128,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,8,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,8,128,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,24,8,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,24,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,1,128,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,1,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,1,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,2,128,0,1,float16,float16,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,2,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,2,128,0,1,fp8,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,4,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,4,128,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,4,128,0,1,fp8,fp8,0,0.016757333030303318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,8,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,8,128,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,24,8,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,24,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,1,128,0,1,float16,float16,0,0.014639999717473984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,1,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,1,128,0,1,fp8,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,2,128,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,2,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,2,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,4,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,4,128,0,1,float16,float16,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,4,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,8,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,8,128,0,1,float16,fp8,0,0.015461333096027374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,24,8,128,0,1,fp8,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,1,128,0,1,fp8,fp8,0,4.410666783650716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,fp8,0,5.554202397664388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,1,128,0,1,float16,float16,0,5.5882829030354815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,float16,0,5.851072311401367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,2,128,0,1,float16,fp8,0,5.72049077351888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,2,128,0,1,fp8,fp8,0,4.428784052530925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,float16,0,5.9243520100911455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,4,128,0,1,float16,fp8,0,5.858650843302409
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,4,128,0,1,fp8,fp8,0,4.478277206420898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,float16,0,5.662192026774089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,8,128,0,1,float16,fp8,0,5.956511815388997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,float16,0,2.8697598775227866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,16,128,0,1,float16,fp8,0,2.9733546574910483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,16,8,128,0,1,fp8,fp8,0,4.543530782063802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,16,128,0,1,fp8,fp8,0,2.3877545992533364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,float16,0,2.826613426208496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,1,128,0,1,fp8,fp8,0,2.281973361968994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,1,128,0,1,float16,fp8,0,2.8600587844848633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,float16,0,2.88043212890625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,2,128,0,1,float16,fp8,0,2.8012641270955405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,2,128,0,1,fp8,fp8,0,2.305786609649658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,float16,0,2.804719924926758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,4,128,0,1,float16,fp8,0,2.9506985346476235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,4,128,0,1,fp8,fp8,0,2.3038613001505532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,float16,0,2.86515744527181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,8,128,0,1,float16,fp8,0,2.841456095377604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,float16,0,1.5228160222371419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,16,8,128,0,1,fp8,fp8,0,2.3453280131022134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,16,128,0,1,float16,fp8,0,1.5598559379577637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,16,128,0,1,fp8,fp8,0,1.411824067433675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,float16,0,1.4579787254333496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,1,128,0,1,float16,fp8,0,1.462538719177246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,1,128,0,1,fp8,fp8,0,1.220421314239502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,float16,0,1.4545547167460124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,2,128,0,1,fp8,fp8,0,1.289749304453532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,2,128,0,1,float16,fp8,0,1.5004266103108723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,float16,0,1.4775627454121907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,4,128,0,1,fp8,fp8,0,1.242965300877889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,4,128,0,1,float16,fp8,0,1.4856319427490234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,float16,0,1.490106741587321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,8,128,0,1,float16,fp8,0,1.5075573921203613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,float16,0,0.8422133127848307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,16,8,128,0,1,fp8,fp8,0,1.255120038986206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,16,128,0,1,float16,fp8,0,0.8645280202229818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,16,128,0,1,fp8,fp8,0,0.7296000321706136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,float16,0,0.8028053442637125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,1,128,0,1,fp8,fp8,0,0.7546026706695557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,1,128,0,1,float16,fp8,0,0.8172106742858887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,float16,0,0.8189333279927572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,2,128,0,1,float16,fp8,0,0.8142346541086832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,2,128,0,1,fp8,fp8,0,0.7043093045552572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,float16,0,0.8281973203023275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,4,128,0,1,float16,fp8,0,0.8224213123321533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,4,128,0,1,fp8,fp8,0,0.7077653408050537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,float16,0,0.8391573429107666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,8,128,0,1,fp8,fp8,0,0.7157653172810873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,16,8,128,0,1,float16,fp8,0,0.8319946924845377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,1,128,0,1,fp8,fp8,0,2.668575922648112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,float16,0,3.3305066426595054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,1,128,0,1,float16,fp8,0,3.258901278177897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,float16,0,3.268218676249186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,2,128,0,1,fp8,fp8,0,2.694197336832682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,2,128,0,1,float16,fp8,0,3.309823989868164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,float16,0,3.3686720530192056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,4,128,0,1,fp8,fp8,0,2.7094081242879233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,4,128,0,1,float16,fp8,0,3.3375679651896157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,float16,0,3.424480120340983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,8,128,0,1,float16,fp8,0,3.4124425252278647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,float16,0,1.7908533414204915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,16,8,128,0,1,fp8,fp8,0,2.760634740193685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,16,128,0,1,float16,fp8,0,1.7932693163553874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,16,128,0,1,fp8,fp8,0,1.475813388824463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,float16,0,1.646399974822998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,1,128,0,1,fp8,fp8,0,1.3994506200154622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,1,128,0,1,float16,fp8,0,1.673210620880127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,float16,0,1.6683039665222168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,2,128,0,1,fp8,fp8,0,1.4018665949503581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,2,128,0,1,float16,fp8,0,1.6774080594380696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,float16,0,1.697098731994629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,4,128,0,1,float16,fp8,0,1.7075200080871582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,4,128,0,1,fp8,fp8,0,1.4145174026489258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,float16,0,1.7418293952941895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,8,128,0,1,float16,fp8,0,1.753328005472819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,float16,0,0.9555040200551351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,16,128,0,1,float16,fp8,0,0.9848480224609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,16,8,128,0,1,fp8,fp8,0,1.52620267868042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,16,128,0,1,fp8,fp8,0,0.8553493022918701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,float16,0,0.8962399959564209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,1,128,0,1,fp8,fp8,0,0.7670453389485677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,1,128,0,1,float16,fp8,0,0.8907039960225424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,float16,0,0.9021226565043131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,2,128,0,1,float16,fp8,0,0.9007360140482584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,2,128,0,1,fp8,fp8,0,0.7687946955362955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,float16,0,0.8990826606750488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,4,128,0,1,float16,fp8,0,0.9167946974436442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,4,128,0,1,fp8,fp8,0,0.7758080164591471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,float16,0,0.9224960009256998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,8,128,0,1,float16,fp8,0,0.9246666431427002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,16,8,128,0,1,fp8,fp8,0,0.78765869140625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,float16,0,0.5480266809463501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,16,128,0,1,float16,fp8,0,0.5528853336970011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,16,128,0,1,fp8,fp8,0,0.4922186533610026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,float16,0,0.5077653328577677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,1,128,0,1,float16,fp8,0,0.5085759957631429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,1,128,0,1,fp8,fp8,0,0.45027732849121094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,float16,0,0.5178133249282837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,2,128,0,1,float16,fp8,0,0.5108213424682617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,2,128,0,1,fp8,fp8,0,0.45338666439056396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,float16,0,0.5219626824061075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,4,128,0,1,float16,fp8,0,0.5176373322804769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,4,128,0,1,fp8,fp8,0,0.4566933314005534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,float16,0,0.5320853392283121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,8,128,0,1,float16,fp8,0,0.5352053244908651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,16,8,128,0,1,fp8,fp8,0,0.46169598897298175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,float16,0,2.3429439862569175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,1,128,0,1,float16,fp8,0,2.3592000007629395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,1,128,0,1,fp8,fp8,0,1.9641013145446777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,float16,0,2.32148806254069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,2,128,0,1,fp8,fp8,0,1.9656000137329102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,2,128,0,1,float16,fp8,0,2.3621652921040854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,float16,0,2.395813306172689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,4,128,0,1,float16,fp8,0,2.4189066886901855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,4,128,0,1,fp8,fp8,0,1.981013298034668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,float16,0,2.456655979156494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,8,128,0,1,float16,fp8,0,2.4274239540100098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,16,8,128,0,1,fp8,fp8,0,2.028970718383789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,float16,0,1.3136160373687744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,16,128,0,1,float16,fp8,0,1.3583359718322754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,16,128,0,1,fp8,fp8,0,1.1224533716837566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,float16,0,1.2094133694966633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,1,128,0,1,float16,fp8,0,1.2314080397288005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,1,128,0,1,fp8,fp8,0,1.0393866697947185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,float16,0,1.2276000181833904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,2,128,0,1,float16,fp8,0,1.2382986545562744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,2,128,0,1,fp8,fp8,0,1.03765869140625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,float16,0,1.252282698949178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,4,128,0,1,fp8,fp8,0,1.0466559727986653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,4,128,0,1,float16,fp8,0,1.2457386652628581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,float16,0,1.2614346345265706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,float16,0,0.7182133197784424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,8,128,0,1,float16,fp8,0,1.286181370417277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,16,8,128,0,1,fp8,fp8,0,1.0645066897074382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,16,128,0,1,float16,fp8,0,0.7289333343505859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,16,128,0,1,fp8,fp8,0,0.60971732934316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,float16,0,0.6623573303222656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,1,128,0,1,float16,fp8,0,0.6563253402709961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,1,128,0,1,fp8,fp8,0,0.5809866587320963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,float16,0,0.6695146560668945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,2,128,0,1,float16,fp8,0,0.6739146709442139
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,2,128,0,1,fp8,fp8,0,0.5750240087509155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,float16,0,0.6799093087514242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,4,128,0,1,float16,fp8,0,0.6793813705444336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,4,128,0,1,fp8,fp8,0,0.5816426674524943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,float16,0,0.692570686340332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,8,128,0,1,fp8,fp8,0,0.592960000038147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,16,8,128,0,1,float16,fp8,0,0.6973600387573242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,float16,0,0.4227306842803955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,16,128,0,1,fp8,fp8,0,0.3654880126317342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,16,128,0,1,float16,fp8,0,0.42395734786987305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,1,128,0,1,fp8,fp8,0,0.33908267815907794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,fp8,0,0.38494400183359784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,1,128,0,1,float16,float16,0,0.3811306556065877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,2,128,0,1,fp8,fp8,0,0.34402668476104736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,fp8,0,0.3837653398513794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,2,128,0,1,float16,float16,0,0.3795520067214966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,4,128,0,1,fp8,fp8,0,0.3479146560033162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,fp8,0,0.39044801394144696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,4,128,0,1,float16,float16,0,0.39188798268636066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,8,128,0,1,fp8,fp8,0,0.3555519978205363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,fp8,0,0.4073653221130371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,16,8,128,0,1,float16,float16,0,0.4040373166402181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,1,128,0,1,fp8,fp8,0,2.6352480252583823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,float16,0,3.1079467137654624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,1,128,0,1,float16,fp8,0,3.1791359583536782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,float16,0,3.2212371826171875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,2,128,0,1,float16,fp8,0,3.159557342529297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,2,128,0,1,fp8,fp8,0,2.65555731455485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,float16,0,3.235408147176107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,4,128,0,1,float16,fp8,0,3.2299680709838867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,4,128,0,1,fp8,fp8,0,2.6994508107503257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,float16,0,3.2964159647623696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,8,128,0,1,float16,fp8,0,3.334613482157389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,float16,0,1.7328853607177734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,16,128,0,1,float16,fp8,0,1.7918559710184734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,16,128,0,1,fp8,fp8,0,1.4620639483133953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,16,8,128,0,1,fp8,fp8,0,2.732026735941569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,float16,0,1.6088320414225261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,1,128,0,1,fp8,fp8,0,1.354325294494629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,1,128,0,1,float16,fp8,0,1.6144320170084636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,float16,0,1.5951627095540364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,2,128,0,1,float16,fp8,0,1.7242186864217122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,2,128,0,1,fp8,fp8,0,1.361738681793213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,float16,0,1.629818598429362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,4,128,0,1,float16,fp8,0,1.6336746215820312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,4,128,0,1,fp8,fp8,0,1.4852693875630696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,float16,0,1.6637120246887207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,8,128,0,1,float16,fp8,0,1.6928265889485676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,16,8,128,0,1,fp8,fp8,0,1.4294026692708333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,float16,0,0.9157386620839437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,16,128,0,1,float16,fp8,0,0.9339199860890707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,16,128,0,1,fp8,fp8,0,0.8023733297983805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,float16,0,0.8360373179117838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,1,128,0,1,float16,fp8,0,0.8335466384887695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,1,128,0,1,fp8,fp8,0,0.7244693438212076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,float16,0,0.8509973684946696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,2,128,0,1,float16,fp8,0,0.8509706656138102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,2,128,0,1,fp8,fp8,0,0.7258559862772623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,float16,0,0.8626293341318766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,4,128,0,1,float16,fp8,0,0.8658666610717773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,4,128,0,1,fp8,fp8,0,0.7346186637878418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,float16,0,0.870405356089274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,8,128,0,1,float16,fp8,0,0.88864533106486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,float16,0,0.5114719867706299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,16,128,0,1,float16,fp8,0,0.5165599981943766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,16,8,128,0,1,fp8,fp8,0,0.7506133715311686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,16,128,0,1,fp8,fp8,0,0.43936534722646076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,float16,0,0.45999467372894287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,1,128,0,1,float16,fp8,0,0.4562613169352214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,1,128,0,1,fp8,fp8,0,0.4106026490529378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,float16,0,0.4684906800587972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,2,128,0,1,float16,fp8,0,0.4667946497599284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,2,128,0,1,fp8,fp8,0,0.4113173484802246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,float16,0,0.47621333599090576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,4,128,0,1,float16,fp8,0,0.47996799151102704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,4,128,0,1,fp8,fp8,0,0.41594131787618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,float16,0,0.48635200659434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,8,128,0,1,float16,fp8,0,0.4933653275171916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,16,8,128,0,1,fp8,fp8,0,0.42370132605234784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,float16,0,0.30803734064102173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,16,128,0,1,float16,fp8,0,0.30931733051935834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,16,128,0,1,fp8,fp8,0,0.26769065856933594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,float16,0,0.2763040065765381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,1,128,0,1,float16,fp8,0,0.2746880054473877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,1,128,0,1,fp8,fp8,0,0.2466826637585958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,float16,0,0.27510400613149005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,2,128,0,1,float16,fp8,0,0.27799999713897705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,2,128,0,1,fp8,fp8,0,0.2489173412322998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,float16,0,0.275434672832489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,4,128,0,1,float16,fp8,0,0.2810399929682414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,4,128,0,1,fp8,fp8,0,0.254314661026001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,float16,0,0.2874293327331543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,8,128,0,1,float16,fp8,0,0.2857866684595744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,16,8,128,0,1,fp8,fp8,0,0.25963733593622845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,float16,0,1.929898738861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,1,128,0,1,fp8,fp8,0,1.6448267300923665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,1,128,0,1,float16,fp8,0,1.9345280329386394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,float16,0,1.9379626909891765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,2,128,0,1,fp8,fp8,0,1.6592000325520833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,2,128,0,1,float16,fp8,0,1.9634666442871094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,float16,0,1.9725546836853027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,4,128,0,1,fp8,fp8,0,1.6808373133341472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,4,128,0,1,float16,fp8,0,1.9613332748413086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,float16,0,2.034880002339681
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,float16,0,1.1088746388753254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,8,128,0,1,fp8,fp8,0,1.7282133102416992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,16,8,128,0,1,float16,fp8,0,2.034554640452067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,16,128,0,1,float16,fp8,0,1.1220906575520833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,16,128,0,1,fp8,fp8,0,0.9401280085245768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,float16,0,0.9941653410593668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,1,128,0,1,float16,fp8,0,0.9941173394521078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,1,128,0,1,fp8,fp8,0,0.8576959768931071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,float16,0,0.9921226501464844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,2,128,0,1,float16,fp8,0,1.003498633702596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,2,128,0,1,fp8,fp8,0,0.8615039984385172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,float16,0,1.0150240262349446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,4,128,0,1,fp8,fp8,0,0.8713119824727377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,4,128,0,1,float16,fp8,0,1.0132213433583577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,float16,0,1.04530668258667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,float16,0,0.5943413178126017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,8,128,0,1,fp8,fp8,0,0.8964320023854574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,16,8,128,0,1,float16,fp8,0,1.059605360031128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,16,128,0,1,float16,fp8,0,0.6008586486180624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,16,128,0,1,fp8,fp8,0,0.507967988650004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,fp8,0,0.533461332321167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,1,128,0,1,float16,float16,0,0.5333173274993896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,1,128,0,1,fp8,fp8,0,0.4659573237101237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,float16,0,0.5317546526590983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,2,128,0,1,fp8,fp8,0,0.46905601024627686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,2,128,0,1,float16,fp8,0,0.5451786518096924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,float16,0,0.5455413262049357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,4,128,0,1,float16,fp8,0,0.5424373149871826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,4,128,0,1,fp8,fp8,0,0.47514132658640545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,float16,0,0.562122662862142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,8,128,0,1,float16,fp8,0,0.5657546520233154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,16,8,128,0,1,fp8,fp8,0,0.48576533794403076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,float16,0,0.3392159938812256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,16,128,0,1,fp8,fp8,0,0.29427733023961383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,16,128,0,1,float16,fp8,0,0.3434720039367676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,float16,0,0.29662400484085083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,1,128,0,1,float16,fp8,0,0.2958986759185791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,1,128,0,1,fp8,fp8,0,0.26921067635218304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,float16,0,0.29985066254933673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,2,128,0,1,float16,fp8,0,0.2995520035425822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,2,128,0,1,fp8,fp8,0,0.27154133717219037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,float16,0,0.30694933732350665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,4,128,0,1,float16,fp8,0,0.30669333537419635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,4,128,0,1,fp8,fp8,0,0.2765973409016927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,float16,0,0.31828800837198895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,8,128,0,1,float16,fp8,0,0.32170132795969647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,16,8,128,0,1,fp8,fp8,0,0.2823200027147929
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,float16,0,0.20795732736587524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,16,128,0,1,float16,fp8,0,0.20940266052881876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,16,128,0,1,fp8,fp8,0,0.18362667163213095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,float16,0,0.18387200435002646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,1,128,0,1,float16,fp8,0,0.18461867173512778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,1,128,0,1,fp8,fp8,0,0.1699893275896708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,float16,0,0.18383467197418213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,2,128,0,1,float16,fp8,0,0.18475733200709024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,2,128,0,1,fp8,fp8,0,0.168938676516215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,float16,0,0.18499199549357095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,4,128,0,1,fp8,fp8,0,0.17068266868591309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,4,128,0,1,float16,fp8,0,0.188917338848114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,float16,0,0.1871359944343567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,8,128,0,1,float16,fp8,0,0.18826667467753092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,16,8,128,0,1,fp8,fp8,0,0.17638933658599854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,float16,0,1.9898239771525066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,1,128,0,1,fp8,fp8,0,1.7343947092692058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,1,128,0,1,float16,fp8,0,1.981178601582845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,float16,0,2.0123626391092935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,2,128,0,1,fp8,fp8,0,1.7512000401814778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,2,128,0,1,float16,fp8,0,2.0271946589152017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,float16,0,2.0383307139078775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,4,128,0,1,float16,fp8,0,2.059552033742269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,4,128,0,1,fp8,fp8,0,1.7721494038899739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,float16,0,2.127354621887207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,8,128,0,1,fp8,fp8,0,1.8398346900939941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,16,8,128,0,1,float16,fp8,0,2.1310240427652993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,float16,0,1.158677339553833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,16,128,0,1,fp8,fp8,0,0.9964319864908854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,16,128,0,1,float16,fp8,0,1.1744906902313232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,float16,0,1.0030293464660645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,1,128,0,1,float16,fp8,0,1.0119199752807617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,1,128,0,1,fp8,fp8,0,0.8833013375600179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,float16,0,1.0247306823730469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,2,128,0,1,float16,fp8,0,1.0187573432922363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,2,128,0,1,fp8,fp8,0,0.8888266881306967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,float16,0,1.0404319763183594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,4,128,0,1,fp8,fp8,0,0.9017493724822998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,4,128,0,1,float16,fp8,0,1.041157325108846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,float16,0,1.080405314763387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,float16,0,0.6072746515274048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,8,128,0,1,fp8,fp8,0,0.9340426921844482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,16,8,128,0,1,float16,fp8,0,1.09224534034729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,16,128,0,1,float16,fp8,0,0.616213321685791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,16,128,0,1,fp8,fp8,0,0.5269440015157064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,fp8,0,0.5332053502400717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,1,128,0,1,float16,float16,0,0.5292533238728842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,1,128,0,1,fp8,fp8,0,0.46981334686279297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,float16,0,0.5353493293126425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,2,128,0,1,float16,fp8,0,0.5398133198420206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,2,128,0,1,fp8,fp8,0,0.47248534361521405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,float16,0,0.5438026587168375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,4,128,0,1,float16,fp8,0,0.551146666208903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,4,128,0,1,fp8,fp8,0,0.47972798347473145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,float16,0,0.5684853394826254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,8,128,0,1,float16,fp8,0,0.5750666856765747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,16,8,128,0,1,fp8,fp8,0,0.4964373509089152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,float16,0,0.33311466375986737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,16,128,0,1,float16,fp8,0,0.339413324991862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,16,128,0,1,fp8,fp8,0,0.29026132822036743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,float16,0,0.28834666808446247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,1,128,0,1,float16,fp8,0,0.2890239953994751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,1,128,0,1,fp8,fp8,0,0.2614133358001709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,float16,0,0.29230932394663495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,2,128,0,1,float16,fp8,0,0.29306666056315106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,2,128,0,1,fp8,fp8,0,0.2644853393236796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,float16,0,0.29949333270390827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,4,128,0,1,float16,fp8,0,0.3025173346201579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,4,128,0,1,fp8,fp8,0,0.2677599986394246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,float16,0,0.31363733609517414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,8,128,0,1,float16,fp8,0,0.3164213299751282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,16,8,128,0,1,fp8,fp8,0,0.2768213351567586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,float16,0,0.19476266702016196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,16,128,0,1,float16,fp8,0,0.19790933529535928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,16,128,0,1,fp8,fp8,0,0.17353065808614096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,float16,0,0.16597867012023926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,1,128,0,1,float16,fp8,0,0.16790932416915894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,float16,0,0.168287992477417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,1,128,0,1,fp8,fp8,0,0.1514400045077006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,2,128,0,1,float16,fp8,0,0.168778657913208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,2,128,0,1,fp8,fp8,0,0.1546026666959127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,float16,0,0.16951467593510947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,4,128,0,1,float16,fp8,0,0.17101866006851196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,4,128,0,1,fp8,fp8,0,0.15779733657836914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,float16,0,0.1806346575419108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,8,128,0,1,float16,fp8,0,0.18086934089660645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,16,8,128,0,1,fp8,fp8,0,0.16528000434239706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,float16,0,0.11799466609954834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,16,128,0,1,float16,fp8,0,0.11984533071517944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,float16,0,0.11392000317573547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,16,128,0,1,fp8,fp8,0,0.11426132917404175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,1,128,0,1,float16,fp8,0,0.11486400167147319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,1,128,0,1,fp8,fp8,0,0.10681600371996562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,float16,0,0.11351999640464783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,float16,0,0.1153706709543864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,2,128,0,1,float16,fp8,0,0.114464004834493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,2,128,0,1,fp8,fp8,0,0.10668266812960307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,4,128,0,1,float16,fp8,0,0.11648533741633098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,4,128,0,1,fp8,fp8,0,0.10620266199111938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,float16,0,0.11426666378974915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,8,128,0,1,float16,fp8,0,0.1165013313293457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,16,8,128,0,1,fp8,fp8,0,0.1074026624361674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,float16,0,1.285264015197754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,1,128,0,1,float16,fp8,0,1.2880000273386638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,1,128,0,1,fp8,fp8,0,1.1303040186564128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,float16,0,1.2967039744059246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,2,128,0,1,fp8,fp8,0,1.1424799760182698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,2,128,0,1,float16,fp8,0,1.3073546886444092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,float16,0,1.3187572956085205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,4,128,0,1,float16,fp8,0,1.3313813209533691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,4,128,0,1,fp8,fp8,0,1.1663520336151123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,float16,0,1.389402707417806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,8,128,0,1,fp8,fp8,0,1.2139253616333008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,16,8,128,0,1,float16,fp8,0,1.3915786743164062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,float16,0,0.7579893271128336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,16,128,0,1,float16,fp8,0,0.7689706484476725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,16,128,0,1,fp8,fp8,0,0.6631413300832113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,float16,0,0.6543733278910319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,1,128,0,1,float16,fp8,0,0.6595360040664673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,1,128,0,1,fp8,fp8,0,0.5830346743265787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,float16,0,0.6670986811319987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,2,128,0,1,float16,fp8,0,0.665231982866923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,2,128,0,1,fp8,fp8,0,0.5872480074564616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,float16,0,0.6807359854380289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,4,128,0,1,fp8,fp8,0,0.599951982498169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,4,128,0,1,float16,fp8,0,0.6853973070780436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,float16,0,0.7123680114746094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,float16,0,0.4042346477508545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,8,128,0,1,float16,fp8,0,0.719866673151652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,16,8,128,0,1,fp8,fp8,0,0.6220000187555949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,16,128,0,1,float16,fp8,0,0.41097601254781085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,16,128,0,1,fp8,fp8,0,0.3565973440806071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,float16,0,0.3487786849339803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,1,128,0,1,float16,fp8,0,0.34678932030995685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,1,128,0,1,fp8,fp8,0,0.31516800324122113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,float16,0,0.3531573216120402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,2,128,0,1,float16,fp8,0,0.3548266490300496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,2,128,0,1,fp8,fp8,0,0.3182506759961446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,float16,0,0.3636853297551473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,4,128,0,1,float16,fp8,0,0.36659733454386395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,4,128,0,1,fp8,fp8,0,0.3233013351758321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,float16,0,0.37830400466918945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,float16,0,0.22799466053644815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,8,128,0,1,fp8,fp8,0,0.3349813222885132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,16,8,128,0,1,float16,fp8,0,0.38606401284535724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,16,128,0,1,float16,fp8,0,0.22873065869013467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,16,128,0,1,fp8,fp8,0,0.2019946575164795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,float16,0,0.19132266441980997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,1,128,0,1,float16,fp8,0,0.19152534008026123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,1,128,0,1,fp8,fp8,0,0.17774399121602377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,float16,0,0.1940106749534607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,2,128,0,1,float16,fp8,0,0.19369600216547647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,2,128,0,1,fp8,fp8,0,0.1795146663983663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,float16,0,0.20122132698694864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,4,128,0,1,float16,fp8,0,0.19981332619984946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,4,128,0,1,fp8,fp8,0,0.18378132581710815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,float16,0,0.2116426626841227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,8,128,0,1,fp8,fp8,0,0.1911946733792623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,16,8,128,0,1,float16,fp8,0,0.2132693330446879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,float16,0,0.13286399841308594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,16,128,0,1,float16,fp8,0,0.13414399822553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,16,128,0,1,fp8,fp8,0,0.12196266651153564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,float16,0,0.11533866326014201
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,1,128,0,1,float16,fp8,0,0.11602133512496948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,1,128,0,1,fp8,fp8,0,0.10598400235176086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,float16,0,0.11607999602953593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,2,128,0,1,float16,fp8,0,0.11622933546702068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,2,128,0,1,fp8,fp8,0,0.10758933424949646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,float16,0,0.11749333143234253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,4,128,0,1,float16,fp8,0,0.11741333206494649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,4,128,0,1,fp8,fp8,0,0.10846400260925293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,float16,0,0.11983999609947205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,8,128,0,1,float16,fp8,0,0.11949867010116577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,16,8,128,0,1,fp8,fp8,0,0.1146399974822998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,float16,0,0.08393067121505737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,16,128,0,1,float16,fp8,0,0.08495466907819112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,16,128,0,1,fp8,fp8,0,0.07926933467388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,float16,0,0.08265600105126698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,1,128,0,1,float16,fp8,0,0.08169066905975342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,1,128,0,1,fp8,fp8,0,0.07665599882602692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,float16,0,0.08137066662311554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,2,128,0,1,float16,fp8,0,0.08195200065771739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,2,128,0,1,fp8,fp8,0,0.07701866825421651
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,float16,0,0.08145066599051158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,4,128,0,1,float16,fp8,0,0.08302933474381764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,4,128,0,1,fp8,fp8,0,0.0766186664501826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,float16,0,0.08269333342711131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,8,128,0,1,float16,fp8,0,0.08299200236797333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,16,8,128,0,1,fp8,fp8,0,0.07795199751853943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,float16,0,1.424741268157959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,1,128,0,1,fp8,fp8,0,1.2672266960144043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,1,128,0,1,float16,fp8,0,1.4226346015930176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,2,128,0,1,fp8,fp8,0,1.2866559823354085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,float16,0,1.444976011912028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,2,128,0,1,float16,fp8,0,1.4581386248270671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,float16,0,1.484015941619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,4,128,0,1,fp8,fp8,0,1.3127040068308513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,4,128,0,1,float16,fp8,0,1.4843360582987468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,float16,0,1.5704372723897297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,float16,0,0.8354612986246744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,16,128,0,1,float16,fp8,0,0.8515199820200602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,8,128,0,1,float16,fp8,0,1.5842453638712566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,16,8,128,0,1,fp8,fp8,0,1.3872159322102864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,16,128,0,1,fp8,fp8,0,0.7490453720092773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,float16,0,0.7206186453501383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,1,128,0,1,fp8,fp8,0,0.6458880106608073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,1,128,0,1,float16,fp8,0,0.7272960344950358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,float16,0,0.7270346482594808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,2,128,0,1,float16,fp8,0,0.7351253032684326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,2,128,0,1,fp8,fp8,0,0.6543840169906616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,float16,0,0.7506773471832275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,4,128,0,1,float16,fp8,0,0.752026637395223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,4,128,0,1,fp8,fp8,0,0.6666719913482666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,float16,0,0.7916959921518961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,8,128,0,1,float16,fp8,0,0.803663969039917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,float16,0,0.4371253252029419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,16,8,128,0,1,fp8,fp8,0,0.7024959723154703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,16,128,0,1,float16,fp8,0,0.4456319808959961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,16,128,0,1,fp8,fp8,0,0.3915040095647176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,float16,0,0.37768534819285077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,1,128,0,1,float16,fp8,0,0.37593599160512287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,1,128,0,1,fp8,fp8,0,0.3415040175120036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,float16,0,0.3822186787923177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,2,128,0,1,float16,fp8,0,0.38318932056427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,2,128,0,1,fp8,fp8,0,0.3449813524881999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,float16,0,0.3913653294245402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,4,128,0,1,float16,fp8,0,0.39552533626556396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,4,128,0,1,fp8,fp8,0,0.3511253197987874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,float16,0,0.4134346644083659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,8,128,0,1,float16,fp8,0,0.41830400625864667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,16,8,128,0,1,fp8,fp8,0,0.3683040142059326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,float16,0,0.2392746607462565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,16,128,0,1,float16,fp8,0,0.24277333418528238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,16,128,0,1,fp8,fp8,0,0.21520533164342245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,float16,0,0.20245333512624106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,1,128,0,1,float16,fp8,0,0.2026133338610331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,1,128,0,1,fp8,fp8,0,0.18849066893259683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,float16,0,0.20524799823760986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,2,128,0,1,float16,fp8,0,0.20834134022394815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,2,128,0,1,fp8,fp8,0,0.19061332941055298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,float16,0,0.2120479941368103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,4,128,0,1,fp8,fp8,0,0.1944213310877482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,4,128,0,1,float16,fp8,0,0.21449067195256552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,float16,0,0.22484266757965088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,8,128,0,1,float16,fp8,0,0.22802132368087769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,float16,0,0.13801067074139914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,16,8,128,0,1,fp8,fp8,0,0.20325332880020142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,16,128,0,1,float16,fp8,0,0.1397546629110972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,16,128,0,1,fp8,fp8,0,0.12601066629091898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,float16,0,0.11314666271209717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,1,128,0,1,float16,fp8,0,0.11435733238855998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,1,128,0,1,fp8,fp8,0,0.10413866241772969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,float16,0,0.11358400185902913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,2,128,0,1,float16,fp8,0,0.11569600303967793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,2,128,0,1,fp8,fp8,0,0.1074026624361674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,float16,0,0.1179146667321523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,4,128,0,1,float16,fp8,0,0.11682666341463725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,4,128,0,1,fp8,fp8,0,0.11100799838701884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,float16,0,0.12611732880274454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,8,128,0,1,float16,fp8,0,0.12603200475374857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,float16,0,0.07980266710122426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,16,8,128,0,1,fp8,fp8,0,0.11819199721018474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,16,128,0,1,float16,fp8,0,0.08027199904123943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,16,128,0,1,fp8,fp8,0,0.07863999903202057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,float16,0,0.07493333518505096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,1,128,0,1,float16,fp8,0,0.07550933460394542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,1,128,0,1,fp8,fp8,0,0.06891733407974243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,float16,0,0.07684266567230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,2,128,0,1,float16,fp8,0,0.07671999931335449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,2,128,0,1,fp8,fp8,0,0.07005866865317027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,float16,0,0.07691200077533722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,4,128,0,1,float16,fp8,0,0.0772213339805603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,4,128,0,1,fp8,fp8,0,0.07057600220044453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,float16,0,0.07650133470694225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,8,128,0,1,float16,fp8,0,0.07861333092053731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,16,8,128,0,1,fp8,fp8,0,0.07274133463700612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,float16,0,0.053898667295773826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,16,128,0,1,float16,fp8,0,0.0561653325955073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,float16,0,0.05197333296140035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,16,128,0,1,fp8,fp8,0,0.053690666953722634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,1,128,0,1,float16,fp8,0,0.052282666166623436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,1,128,0,1,fp8,fp8,0,0.049829334020614624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,float16,0,0.05356266597906748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,2,128,0,1,float16,fp8,0,0.052186667919158936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,2,128,0,1,fp8,fp8,0,0.050554667909940086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,float16,0,0.053930665055910744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,4,128,0,1,float16,fp8,0,0.05414933462937673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,4,128,0,1,fp8,fp8,0,0.05190399785836538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,float16,0,0.05398400127887726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,8,128,0,1,float16,fp8,0,0.05406933526198069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,16,8,128,0,1,fp8,fp8,0,0.05171733101209005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,float16,0,0.9690133730570475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,1,128,0,1,fp8,fp8,0,0.8698346614837646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,1,128,0,1,float16,fp8,0,0.9761760234832764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,float16,0,0.9859946568806967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,2,128,0,1,fp8,fp8,0,0.8846453030904134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,2,128,0,1,float16,fp8,0,0.9883946577707926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,float16,0,1.0069119930267334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,4,128,0,1,fp8,fp8,0,0.901642640431722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,4,128,0,1,float16,fp8,0,1.015775998433431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,float16,0,1.0795413653055828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,float16,0,0.5783199866612753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,8,128,0,1,fp8,fp8,0,0.9503466288248698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,16,128,0,1,float16,fp8,0,0.5894026756286621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,16,8,128,0,1,float16,fp8,0,1.0802079836527507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,16,128,0,1,fp8,fp8,0,0.5255786577860514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,1,128,0,1,fp8,fp8,0,0.4474986791610718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,float16,0,0.49181334177652997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,1,128,0,1,float16,fp8,0,0.5002826849619547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,float16,0,0.5042453209559122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,2,128,0,1,fp8,fp8,0,0.45374401410420734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,2,128,0,1,float16,fp8,0,0.5052640040715536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,float16,0,0.5165599981943766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,4,128,0,1,float16,fp8,0,0.5199999809265137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,4,128,0,1,fp8,fp8,0,0.464789350827535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,float16,0,0.539738655090332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,8,128,0,1,float16,fp8,0,0.5497013330459595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,float16,0,0.3063039978345235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,16,8,128,0,1,fp8,fp8,0,0.48706666628519696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,16,128,0,1,float16,fp8,0,0.3114560047785441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,16,128,0,1,fp8,fp8,0,0.2797066569328308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,float16,0,0.260858674844106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,1,128,0,1,float16,fp8,0,0.2610666751861572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,1,128,0,1,fp8,fp8,0,0.24001065889994302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,float16,0,0.26523733139038086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,2,128,0,1,float16,fp8,0,0.2682560086250305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,2,128,0,1,fp8,fp8,0,0.2425653338432312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,float16,0,0.2739466627438863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,4,128,0,1,float16,fp8,0,0.27589333057403564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,4,128,0,1,fp8,fp8,0,0.2483946681022644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,float16,0,0.28592532873153687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,8,128,0,1,float16,fp8,0,0.2906240026156108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,16,8,128,0,1,fp8,fp8,0,0.2584693431854248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,float16,0,0.16894400119781494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,16,128,0,1,fp8,fp8,0,0.15656532843907675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,16,128,0,1,float16,fp8,0,0.17318934202194214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,float16,0,0.14109866817792258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,1,128,0,1,float16,fp8,0,0.13926933209101358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,1,128,0,1,fp8,fp8,0,0.132341335217158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,float16,0,0.14223999778429666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,2,128,0,1,float16,fp8,0,0.14470932881037393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,2,128,0,1,fp8,fp8,0,0.13573333621025085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,float16,0,0.14815466602643332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,4,128,0,1,float16,fp8,0,0.15044266978899637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,4,128,0,1,fp8,fp8,0,0.13949867089589438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,float16,0,0.15809067090352377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,float16,0,0.09715732932090759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,8,128,0,1,float16,fp8,0,0.159061332543691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,16,8,128,0,1,fp8,fp8,0,0.14429333806037903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,16,128,0,1,float16,fp8,0,0.0993280013402303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,16,128,0,1,fp8,fp8,0,0.09245333075523376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,float16,0,0.082997332016627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,1,128,0,1,float16,fp8,0,0.08384000261624654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,1,128,0,1,fp8,fp8,0,0.07692799965540568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,float16,0,0.08458667000134786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,2,128,0,1,float16,fp8,0,0.08286933104197185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,2,128,0,1,fp8,fp8,0,0.07787199815114339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,float16,0,0.08497599760691325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,4,128,0,1,float16,fp8,0,0.08508800466855367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,4,128,0,1,fp8,fp8,0,0.07853333155314128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,float16,0,0.08772800366083781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,8,128,0,1,float16,fp8,0,0.0885813335577647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,16,8,128,0,1,fp8,fp8,0,0.08473599950472514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,float16,0,0.060229331254959106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,16,128,0,1,float16,fp8,0,0.062090665102005005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,16,128,0,1,fp8,fp8,0,0.05836800237496694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,float16,0,0.05824000140031179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,1,128,0,1,float16,fp8,0,0.05836800237496694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,1,128,0,1,fp8,fp8,0,0.05378133555253347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,float16,0,0.057962665955225624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,2,128,0,1,float16,fp8,0,0.05825066566467285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,2,128,0,1,fp8,fp8,0,0.054416000843048096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,float16,0,0.058229332168896995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,4,128,0,1,float16,fp8,0,0.060271998246510826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,4,128,0,1,fp8,fp8,0,0.0540533314148585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,float16,0,0.06003733476003011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,8,128,0,1,float16,fp8,0,0.060090666015942894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,16,8,128,0,1,fp8,fp8,0,0.054325332244237266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,float16,0,0.044213334719340004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,16,128,0,1,float16,fp8,0,0.04478399952252706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,16,128,0,1,fp8,fp8,0,0.042223999897638954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,float16,0,0.043552001317342125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,1,128,0,1,float16,fp8,0,0.043712000052134194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,1,128,0,1,fp8,fp8,0,0.04161600023508072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,float16,0,0.0436106671889623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,2,128,0,1,float16,fp8,0,0.04557866851488749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,2,128,0,1,fp8,fp8,0,0.04165866722663244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,float16,0,0.04366933306058248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,4,128,0,1,float16,fp8,0,0.04372266431649526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,4,128,0,1,fp8,fp8,0,0.041946664452552795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,float16,0,0.044293334086736046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,8,128,0,1,float16,fp8,0,0.04412800073623657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,16,8,128,0,1,fp8,fp8,0,0.0420959989229838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,float16,0,1.0399306615193684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,1,128,0,1,fp8,fp8,0,0.9920960267384847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,1,128,0,1,float16,fp8,0,1.0346559683481853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,2,128,0,1,fp8,fp8,0,1.0032213528951008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,float16,0,1.0615306695302327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,2,128,0,1,float16,fp8,0,1.0505653222401936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,float16,0,1.1207306385040283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,float16,0,1.1309119860331218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,4,128,0,1,float16,fp8,0,1.098842700322469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,4,128,0,1,fp8,fp8,0,1.2038133144378662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,float16,0,0.6253653367360433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,16,128,0,1,float16,fp8,0,0.6175839900970459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,8,128,0,1,float16,fp8,0,1.1142346858978271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,16,8,128,0,1,fp8,fp8,0,1.1667733192443848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,float16,0,0.5302613178888956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,16,128,0,1,fp8,fp8,0,0.6000800132751465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,1,128,0,1,float16,fp8,0,0.5283893346786499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,1,128,0,1,fp8,fp8,0,0.5078239838282267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,float16,0,0.5421066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,2,128,0,1,float16,fp8,0,0.5389546553293864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,2,128,0,1,fp8,fp8,0,0.5116480191548666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,float16,0,0.5646506547927856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,4,128,0,1,float16,fp8,0,0.5615040063858032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,4,128,0,1,fp8,fp8,0,0.5980106592178345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,float16,0,0.5799520015716553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,8,128,0,1,float16,fp8,0,0.5683893362681071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,float16,0,0.32550932963689166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,16,8,128,0,1,fp8,fp8,0,0.5700960159301758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,16,128,0,1,float16,fp8,0,0.3208906650543213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,16,128,0,1,fp8,fp8,0,0.3122239907582601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,float16,0,0.2777013381322225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,1,128,0,1,float16,fp8,0,0.27794132630030316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,1,128,0,1,fp8,fp8,0,0.26317866643269855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,float16,0,0.2842186689376831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,2,128,0,1,float16,fp8,0,0.28381866216659546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,2,128,0,1,fp8,fp8,0,0.26497600475947064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,float16,0,0.2978399991989136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,4,128,0,1,float16,fp8,0,0.29454400142033893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,4,128,0,1,fp8,fp8,0,0.2935306628545125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,float16,0,0.30373867352803546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,8,128,0,1,float16,fp8,0,0.29940799872080487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,float16,0,0.17824000120162964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,16,8,128,0,1,fp8,fp8,0,0.2877653241157532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,16,128,0,1,float16,fp8,0,0.1743946671485901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,16,128,0,1,fp8,fp8,0,0.16749332348505655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,float16,0,0.14993600050608316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,1,128,0,1,float16,fp8,0,0.14851199587186178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,1,128,0,1,fp8,fp8,0,0.1413653294245402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,float16,0,0.1525813341140747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,2,128,0,1,float16,fp8,0,0.1536960005760193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,2,128,0,1,fp8,fp8,0,0.14241066575050354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,float16,0,0.1607039968172709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,4,128,0,1,float16,fp8,0,0.1590079963207245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,4,128,0,1,fp8,fp8,0,0.15665066242218018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,float16,0,0.16456000010172525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,8,128,0,1,float16,fp8,0,0.1625706652800242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,16,8,128,0,1,fp8,fp8,0,0.15646933515866598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,fp8,0,0.10012267033259074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,16,128,0,1,float16,float16,0,0.10157333811124165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,16,128,0,1,fp8,fp8,0,0.09693866968154907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,float16,0,0.08310399949550629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,1,128,0,1,float16,fp8,0,0.08216533561547597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,1,128,0,1,fp8,fp8,0,0.07904000083605449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,float16,0,0.08514666557312012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,2,128,0,1,float16,fp8,0,0.08286933104197185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,2,128,0,1,fp8,fp8,0,0.07830933233102162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,float16,0,0.08839466174443562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,4,128,0,1,float16,fp8,0,0.0872320036093394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,4,128,0,1,fp8,fp8,0,0.0853653351465861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,float16,0,0.09273599584897359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,8,128,0,1,float16,fp8,0,0.09114666779836018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,float16,0,0.05481066803137461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,16,8,128,0,1,fp8,fp8,0,0.08841066559155782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,16,128,0,1,float16,fp8,0,0.05428266525268555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,16,128,0,1,fp8,fp8,0,0.05421866476535797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,float16,0,0.05221333106358846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,1,128,0,1,float16,fp8,0,0.051311999559402466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,1,128,0,1,fp8,fp8,0,0.04814399778842926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,float16,0,0.05362666646639506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,2,128,0,1,float16,fp8,0,0.052058666944503784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,2,128,0,1,fp8,fp8,0,0.048309331138928734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,float16,0,0.053120002150535583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,4,128,0,1,float16,fp8,0,0.05421333511670431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,4,128,0,1,fp8,fp8,0,0.05163733164469401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,float16,0,0.05226666728655497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,8,128,0,1,float16,fp8,0,0.05390933156013489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,16,8,128,0,1,fp8,fp8,0,0.053077335158983864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,float16,0,0.03551999976237615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,16,128,0,1,float16,fp8,0,0.03575466573238373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,16,128,0,1,fp8,fp8,0,0.035717333356539406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,float16,0,0.03344533344109853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,1,128,0,1,fp8,fp8,0,0.033215999603271484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,1,128,0,1,float16,fp8,0,0.03342933456103007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,float16,0,0.035504000882307686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,2,128,0,1,float16,fp8,0,0.03403199960788091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,2,128,0,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,float16,0,0.03575466573238373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,4,128,0,1,float16,fp8,0,0.035589332381884255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,4,128,0,1,fp8,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,float16,0,0.03550933301448822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,8,128,0,1,float16,fp8,0,0.035631999373435974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,16,8,128,0,1,fp8,fp8,0,0.03549333413441976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,float16,0,0.031199999153614044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,16,128,0,1,float16,fp8,0,0.03146666785081228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,16,128,0,1,fp8,fp8,0,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,float16,0,0.029525332152843475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,1,128,0,1,float16,fp8,0,0.02957333376010259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,1,128,0,1,fp8,fp8,0,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,float16,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,2,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,2,128,0,1,fp8,fp8,0,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,float16,0,0.029872000217437744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,4,128,0,1,float16,fp8,0,0.02958400050799052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,4,128,0,1,fp8,fp8,0,0.02769600103298823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,float16,0,0.02958400050799052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,8,128,0,1,float16,fp8,0,0.029663999875386555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,16,8,128,0,1,fp8,fp8,0,0.02975466599067052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,1,128,0,1,float16,fp8,0,0.8962133725484213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,1,128,0,1,float16,float16,0,0.9055893421173096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,1,128,0,1,fp8,fp8,0,0.8722240130106608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,2,128,0,1,float16,float16,0,0.919813315073649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,2,128,0,1,float16,fp8,0,0.9141226609547933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,2,128,0,1,fp8,fp8,0,0.8791786829630533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,4,128,0,1,float16,float16,0,0.9825387001037598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,4,128,0,1,float16,fp8,0,0.9616106351216634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,4,128,0,1,fp8,fp8,0,1.0741759936014812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,8,128,0,1,float16,float16,0,0.9805440107981364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,16,128,0,1,float16,float16,0,0.5471733411153158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,16,128,0,1,float16,fp8,0,0.5390133460362753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,8,128,0,1,float16,fp8,0,0.9618186950683594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,16,8,128,0,1,fp8,fp8,0,1.0361226399739583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,1,128,0,1,float16,float16,0,0.463589350382487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,16,128,0,1,fp8,fp8,0,0.5364266633987427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,1,128,0,1,fp8,fp8,0,0.44514667987823486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,1,128,0,1,float16,fp8,0,0.4614986578623454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,2,128,0,1,float16,float16,0,0.47168533007303876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,2,128,0,1,float16,fp8,0,0.4688906669616699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,2,128,0,1,fp8,fp8,0,0.4476426839828491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,4,128,0,1,float16,float16,0,0.49235733350118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,4,128,0,1,float16,fp8,0,0.48662400245666504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,4,128,0,1,fp8,fp8,0,0.5222346782684326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,8,128,0,1,float16,float16,0,0.5028426647186279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,8,128,0,1,float16,fp8,0,0.4933546781539917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,16,128,0,1,float16,float16,0,0.2851039965947469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,16,8,128,0,1,fp8,fp8,0,0.5039466619491577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,16,128,0,1,float16,fp8,0,0.28142400582631427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,16,128,0,1,fp8,fp8,0,0.27853333950042725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,1,128,0,1,float16,float16,0,0.24251733223597208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,1,128,0,1,float16,fp8,0,0.23932800690333048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,1,128,0,1,fp8,fp8,0,0.2304426630338033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,2,128,0,1,float16,float16,0,0.24679466088612875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,2,128,0,1,float16,fp8,0,0.24473067124684653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,2,128,0,1,fp8,fp8,0,0.23262399435043335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,4,128,0,1,float16,float16,0,0.25684799750645954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,4,128,0,1,float16,fp8,0,0.25440533955891925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,4,128,0,1,fp8,fp8,0,0.25489066044489544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,8,128,0,1,float16,float16,0,0.2632480065027873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,8,128,0,1,float16,fp8,0,0.2598453362782796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,16,8,128,0,1,fp8,fp8,0,0.2566240032513936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,16,128,0,1,float16,float16,0,0.1549493372440338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,16,128,0,1,float16,fp8,0,0.15286399920781454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,16,128,0,1,fp8,fp8,0,0.1508799990018209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,1,128,0,1,float16,float16,0,0.13024000326792398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,1,128,0,1,float16,fp8,0,0.13016000390052795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,1,128,0,1,fp8,fp8,0,0.12387733658154805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,2,128,0,1,float16,float16,0,0.13362133502960205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,2,128,0,1,float16,fp8,0,0.13421866297721863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,2,128,0,1,fp8,fp8,0,0.12588799993197122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,4,128,0,1,float16,float16,0,0.14038933316866556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,4,128,0,1,float16,fp8,0,0.1381493310133616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,4,128,0,1,fp8,fp8,0,0.1357973317305247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,8,128,0,1,float16,float16,0,0.1442293326059977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,8,128,0,1,float16,fp8,0,0.14096533258756003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,16,8,128,0,1,fp8,fp8,0,0.13750933607419333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,16,128,0,1,float16,float16,0,0.08808533350626628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,16,128,0,1,float16,fp8,0,0.08703466256459554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,16,128,0,1,fp8,fp8,0,0.08545600374539693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,1,128,0,1,float16,float16,0,0.07248533268769582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,1,128,0,1,float16,fp8,0,0.07231466472148895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,1,128,0,1,fp8,fp8,0,0.06866133213043213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,2,128,0,1,float16,float16,0,0.07351999978224437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,2,128,0,1,float16,fp8,0,0.07364266614119212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,2,128,0,1,fp8,fp8,0,0.0682239979505539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,4,128,0,1,float16,float16,0,0.07640000184377034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,4,128,0,1,float16,fp8,0,0.0762613316377004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,4,128,0,1,fp8,fp8,0,0.07459733386834462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,8,128,0,1,float16,float16,0,0.07879999776681264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,16,128,0,1,float16,float16,0,0.04827199876308441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,8,128,0,1,float16,fp8,0,0.07830399771531422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,16,8,128,0,1,fp8,fp8,0,0.07832000156243642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,16,128,0,1,float16,fp8,0,0.04762133459250132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,16,128,0,1,fp8,fp8,0,0.04861866434415182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,1,128,0,1,float16,float16,0,0.04411733150482178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,1,128,0,1,float16,fp8,0,0.04497066636880239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,1,128,0,1,fp8,fp8,0,0.04279999931653341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,2,128,0,1,float16,float16,0,0.04452266792456309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,2,128,0,1,float16,fp8,0,0.04576000074545542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,4,128,0,1,float16,float16,0,0.047781333327293396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,2,128,0,1,fp8,fp8,0,0.04274133344491323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,4,128,0,1,float16,fp8,0,0.04663999875386556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,4,128,0,1,fp8,fp8,0,0.04595200220743815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,8,128,0,1,float16,float16,0,0.04764799773693085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,8,128,0,1,float16,fp8,0,0.04646400113900503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,16,128,0,1,float16,float16,0,0.031914666295051575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,16,8,128,0,1,fp8,fp8,0,0.046122665206591286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,16,128,0,1,float16,fp8,0,0.031125334401925404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,1,128,0,1,float16,float16,0,0.029802667597929638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,16,128,0,1,fp8,fp8,0,0.03142933299144109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,1,128,0,1,float16,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,1,128,0,1,fp8,fp8,0,0.028069332242012024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,2,128,0,1,float16,float16,0,0.02962133288383484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,2,128,0,1,float16,fp8,0,0.02922666569550832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,2,128,0,1,fp8,fp8,0,0.029050665597120922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,4,128,0,1,float16,float16,0,0.02993600070476532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,4,128,0,1,float16,fp8,0,0.02974933385848999
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,4,128,0,1,fp8,fp8,0,0.029472000896930695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,8,128,0,1,float16,float16,0,0.0305173322558403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,8,128,0,1,float16,fp8,0,0.0315733328461647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,16,8,128,0,1,fp8,fp8,0,0.029637334247430164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,16,128,0,1,float16,float16,0,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,16,128,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,16,128,0,1,fp8,fp8,0,0.025199999411900837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,1,128,0,1,float16,float16,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,1,128,0,1,float16,fp8,0,0.023418667415777843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,1,128,0,1,fp8,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,2,128,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,2,128,0,1,float16,fp8,0,0.024527999262015026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,2,128,0,1,fp8,fp8,0,0.02364266663789749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,4,128,0,1,float16,float16,0,0.02571200082699458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,4,128,0,1,float16,fp8,0,0.025248001019159954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,4,128,0,1,fp8,fp8,0,0.023621333142121632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,8,128,0,1,float16,float16,0,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,8,128,0,1,float16,fp8,0,0.02550933261712392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,16,8,128,0,1,fp8,fp8,0,0.025439999997615814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,16,128,0,1,float16,float16,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,16,128,0,1,float16,fp8,0,0.02367999901374181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,16,128,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,1,128,0,1,float16,float16,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,1,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,2,128,0,1,float16,float16,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,1,128,0,1,fp8,fp8,0,0.02179199953873952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,2,128,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,2,128,0,1,fp8,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,4,128,0,1,float16,float16,0,0.021375998854637146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,4,128,0,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,4,128,0,1,float16,fp8,0,0.02181866765022278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,8,128,0,1,float16,float16,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,8,128,0,1,float16,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,16,8,128,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,1,128,0,1,float16,fp8,0,0.4140266577402751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,1,128,0,1,float16,float16,0,0.4209119876225789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,1,128,0,1,fp8,fp8,0,0.40930668512980145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,2,128,0,1,float16,float16,0,0.4338293472925822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,2,128,0,1,float16,fp8,0,0.4269546667734782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,4,128,0,1,float16,float16,0,0.45473066965738934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,2,128,0,1,fp8,fp8,0,0.4140160083770752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,4,128,0,1,float16,fp8,0,0.44761065642038983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,4,128,0,1,fp8,fp8,0,0.4893653392791748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,8,128,0,1,float16,fp8,0,0.45235200723012287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,8,128,0,1,float16,float16,0,0.4633760054906209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,16,128,0,1,float16,float16,0,0.2654986580212911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,16,128,0,1,float16,fp8,0,0.26107199986775714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,16,8,128,0,1,fp8,fp8,0,0.4570186535517375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,1,128,0,1,float16,float16,0,0.22076799472173056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,16,128,0,1,fp8,fp8,0,0.26395199696222943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,1,128,0,1,float16,fp8,0,0.21897067626317343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,1,128,0,1,fp8,fp8,0,0.21280533075332642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,2,128,0,1,float16,float16,0,0.22847465674082437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,2,128,0,1,float16,fp8,0,0.2243786652882894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,2,128,0,1,fp8,fp8,0,0.21594667434692383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,4,128,0,1,float16,float16,0,0.23883734146753946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,4,128,0,1,float16,fp8,0,0.23454932371775308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,4,128,0,1,fp8,fp8,0,0.23418132464090982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,8,128,0,1,float16,float16,0,0.24315200249354044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,8,128,0,1,float16,fp8,0,0.237829327583313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,16,128,0,1,float16,float16,0,0.14275733629862467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,16,8,128,0,1,fp8,fp8,0,0.2390026648839315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,16,128,0,1,float16,fp8,0,0.14134933551152548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,16,128,0,1,fp8,fp8,0,0.14268799622853598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,1,128,0,1,float16,float16,0,0.11789333820343018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,1,128,0,1,float16,fp8,0,0.1163040002187093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,1,128,0,1,fp8,fp8,0,0.11348266402880351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,2,128,0,1,float16,float16,0,0.12159466743469238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,2,128,0,1,float16,fp8,0,0.11957333485285442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,2,128,0,1,fp8,fp8,0,0.11563199758529663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,4,128,0,1,float16,float16,0,0.1279253363609314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,4,128,0,1,float16,fp8,0,0.12666133046150208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,4,128,0,1,fp8,fp8,0,0.12549333771069845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,8,128,0,1,float16,float16,0,0.13251733779907227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,16,128,0,1,float16,float16,0,0.08452266454696655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,8,128,0,1,fp8,fp8,0,0.12827199697494507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,16,8,128,0,1,float16,fp8,0,0.12920000155766806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,16,128,0,1,float16,fp8,0,0.0823520024617513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,1,128,0,1,float16,float16,0,0.06654400130112965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,16,128,0,1,fp8,fp8,0,0.08101866642634074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,1,128,0,1,float16,fp8,0,0.0664106657107671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,1,128,0,1,fp8,fp8,0,0.06228800117969513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,2,128,0,1,float16,float16,0,0.0684799998998642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,2,128,0,1,float16,fp8,0,0.06685333450635274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,2,128,0,1,fp8,fp8,0,0.06390933195749919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,4,128,0,1,float16,float16,0,0.07067200044790904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,4,128,0,1,float16,fp8,0,0.07030933101971944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,4,128,0,1,fp8,fp8,0,0.07152000069618225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,8,128,0,1,float16,float16,0,0.07406400144100189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,8,128,0,1,float16,fp8,0,0.0724533349275589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,16,128,0,1,float16,float16,0,0.04363733530044556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,16,8,128,0,1,fp8,fp8,0,0.07457600037256877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,16,128,0,1,float16,fp8,0,0.04368533194065094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,1,128,0,1,float16,float16,0,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,1,128,0,1,float16,fp8,0,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,1,128,0,1,fp8,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,16,128,0,1,fp8,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,2,128,0,1,float16,float16,0,0.039450667798519135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,2,128,0,1,float16,fp8,0,0.0396373321612676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,2,128,0,1,fp8,fp8,0,0.037717332442601524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,4,128,0,1,float16,float16,0,0.042949333786964417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,4,128,0,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,4,128,0,1,fp8,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,8,128,0,1,float16,float16,0,0.041722665230433144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,8,128,0,1,fp8,fp8,0,0.04041599979003271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,16,128,0,1,float16,float16,0,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,16,128,0,1,float16,fp8,0,0.029120000700155895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,16,8,128,0,1,float16,fp8,0,0.04204266766707102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,16,128,0,1,fp8,fp8,0,0.029370665550231934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,1,128,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,1,128,0,1,float16,float16,0,0.027450665831565857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,1,128,0,1,fp8,fp8,0,0.027130665878454845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,2,128,0,1,float16,float16,0,0.027717334528764088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,2,128,0,1,float16,fp8,0,0.02794133375088374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,2,128,0,1,fp8,fp8,0,0.025968000292778015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,4,128,0,1,float16,float16,0,0.02794666588306427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,4,128,0,1,fp8,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,4,128,0,1,float16,fp8,0,0.027503999571005504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,8,128,0,1,float16,float16,0,0.029264000554879505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,8,128,0,1,fp8,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,16,128,0,1,float16,float16,0,0.023189333577950794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,16,8,128,0,1,float16,fp8,0,0.029626667499542236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,16,128,0,1,float16,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,16,128,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,1,128,0,1,float16,float16,0,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,1,128,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,1,128,0,1,float16,fp8,0,0.02370133250951767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,2,128,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,2,128,0,1,float16,fp8,0,0.0230880007147789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,2,128,0,1,fp8,fp8,0,0.02143999934196472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,4,128,0,1,float16,float16,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,4,128,0,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,4,128,0,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,8,128,0,1,float16,float16,0,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,8,128,0,1,fp8,fp8,0,0.023306667804718018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,16,8,128,0,1,float16,fp8,0,0.02425066630045573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,16,128,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,16,128,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,16,128,0,1,fp8,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,1,128,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,1,128,0,1,float16,fp8,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,1,128,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,2,128,0,1,float16,float16,0,0.019733333339293797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,2,128,0,1,float16,fp8,0,0.019546666493018467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,2,128,0,1,fp8,fp8,0,0.019541333119074505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,4,128,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,4,128,0,1,float16,float16,0,0.02090666691462199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,4,128,0,1,fp8,fp8,0,0.01979200045267741
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,8,128,0,1,float16,float16,0,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,8,128,0,1,float16,fp8,0,0.021690666675567627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,16,8,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,16,128,0,1,float16,float16,0,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,16,128,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,16,128,0,1,float16,fp8,0,0.019461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,1,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,1,128,0,1,float16,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,1,128,0,1,fp8,fp8,0,0.017610666652520496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,2,128,0,1,float16,float16,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,2,128,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,2,128,0,1,fp8,fp8,0,0.01777600000301997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,4,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,4,128,0,1,float16,fp8,0,0.019120000302791595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,4,128,0,1,fp8,fp8,0,0.017653333644072216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,8,128,0,1,float16,float16,0,0.01889066646496455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,8,128,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,16,8,128,0,1,fp8,fp8,0,0.019354666272799175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,1,128,0,1,float16,float16,0,0.23073599735895792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,1,128,0,1,float16,fp8,0,0.23020267486572266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,1,128,0,1,fp8,fp8,0,0.2305226723353068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,2,128,0,1,float16,float16,0,0.2375146746635437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,2,128,0,1,float16,fp8,0,0.2311413288116455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,2,128,0,1,fp8,fp8,0,0.23203200101852417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,4,128,0,1,float16,float16,0,0.25197867552439374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,4,128,0,1,float16,fp8,0,0.24595733483632407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,4,128,0,1,fp8,fp8,0,0.2547840078671773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,8,128,0,1,float16,float16,0,0.2550826668739319
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,8,128,0,1,float16,fp8,0,0.24729067087173462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,16,8,128,0,1,fp8,fp8,0,0.24928534030914307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,16,128,0,1,float16,float16,0,0.14628266294797262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,16,128,0,1,float16,fp8,0,0.14446933070818582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,16,128,0,1,fp8,fp8,0,0.14662933349609375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,1,128,0,1,float16,float16,0,0.12327999869982402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,1,128,0,1,float16,fp8,0,0.12380266189575195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,1,128,0,1,fp8,fp8,0,0.12132799625396729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,2,128,0,1,float16,float16,0,0.1269813378651937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,2,128,0,1,float16,fp8,0,0.12414933244387309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,2,128,0,1,fp8,fp8,0,0.12365866700808208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,4,128,0,1,float16,float16,0,0.13550399740537009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,4,128,0,1,float16,fp8,0,0.1320480008920034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,4,128,0,1,fp8,fp8,0,0.1362613340218862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,8,128,0,1,float16,float16,0,0.13758933544158936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,8,128,0,1,float16,fp8,0,0.1320853332678477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,16,128,0,1,float16,float16,0,0.08277866741021474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,16,8,128,0,1,fp8,fp8,0,0.1351040005683899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,16,128,0,1,float16,fp8,0,0.08207466701666515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,16,128,0,1,fp8,fp8,0,0.08293333152929942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,1,128,0,1,float16,float16,0,0.06850666801134746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,1,128,0,1,float16,fp8,0,0.06829866766929626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,1,128,0,1,fp8,fp8,0,0.06632000207901001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,2,128,0,1,float16,float16,0,0.07001600166161855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,2,128,0,1,fp8,fp8,0,0.06658666829268138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,2,128,0,1,float16,fp8,0,0.06859733164310455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,4,128,0,1,float16,float16,0,0.07287999987602234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,4,128,0,1,float16,fp8,0,0.07273066540559132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,4,128,0,1,fp8,fp8,0,0.0735093355178833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,8,128,0,1,float16,float16,0,0.07658666869004567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,8,128,0,1,float16,fp8,0,0.07445333401362102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,16,128,0,1,float16,float16,0,0.04565866788228353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,16,8,128,0,1,fp8,fp8,0,0.0758240024248759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,16,128,0,1,float16,fp8,0,0.044293334086736046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,16,128,0,1,fp8,fp8,0,0.04714666803677877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,1,128,0,1,float16,float16,0,0.04227200150489807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,1,128,0,1,float16,fp8,0,0.041840001940727234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,2,128,0,1,float16,float16,0,0.04167999823888143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,1,128,0,1,fp8,fp8,0,0.04037333279848099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,2,128,0,1,float16,fp8,0,0.041663999358812966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,2,128,0,1,fp8,fp8,0,0.04201599955558777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,4,128,0,1,float16,float16,0,0.04391466577847799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,4,128,0,1,fp8,fp8,0,0.04191466669241587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,4,128,0,1,float16,fp8,0,0.04435733457406362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,8,128,0,1,float16,float16,0,0.04363733530044556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,8,128,0,1,float16,fp8,0,0.04367466767628988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,16,128,0,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,16,8,128,0,1,fp8,fp8,0,0.04387199878692627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,16,128,0,1,float16,fp8,0,0.029765332738558452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,16,128,0,1,fp8,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,1,128,0,1,float16,fp8,0,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,1,128,0,1,float16,float16,0,0.029893333713213604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,1,128,0,1,fp8,fp8,0,0.027136000494162243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,2,128,0,1,float16,float16,0,0.027376001079877216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,2,128,0,1,float16,fp8,0,0.02754133443037669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,4,128,0,1,float16,float16,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,2,128,0,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,4,128,0,1,float16,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,4,128,0,1,fp8,fp8,0,0.028207999964555103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,8,128,0,1,float16,float16,0,0.029274667302767437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,8,128,0,1,float16,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,16,8,128,0,1,fp8,fp8,0,0.029418667157491047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,16,128,0,1,float16,float16,0,0.02093333254257838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,16,128,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,16,128,0,1,fp8,fp8,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,1,128,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,1,128,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,1,128,0,1,fp8,fp8,0,0.020960000654061634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,2,128,0,1,float16,float16,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,2,128,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,2,128,0,1,fp8,fp8,0,0.021104000508785248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,4,128,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,4,128,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,4,128,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,8,128,0,1,float16,float16,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,8,128,0,1,float16,fp8,0,0.021770666042963665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,16,8,128,0,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,16,128,0,1,float16,float16,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,16,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,16,128,0,1,fp8,fp8,0,0.01877333347996076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,1,128,0,1,float16,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,1,128,0,1,fp8,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,2,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,2,128,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,2,128,0,1,fp8,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,4,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,4,128,0,1,float16,fp8,0,0.018906666586796444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,4,128,0,1,fp8,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,8,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,8,128,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,16,8,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,16,128,0,1,float16,float16,0,0.017429333180189133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,16,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,16,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,1,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,1,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,2,128,0,1,float16,float16,0,0.016783999900023144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,2,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,1,128,0,1,fp8,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,4,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,2,128,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,4,128,0,1,float16,fp8,0,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,4,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,8,128,0,1,float16,float16,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,8,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,16,8,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,16,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,16,128,0,1,float16,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,16,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,1,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,1,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,1,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,2,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,2,128,0,1,float16,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,4,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,4,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,4,128,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,8,128,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,8,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,16,8,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,1,128,0,1,float16,float16,0,0.1553920010725657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,1,128,0,1,float16,fp8,0,0.15636266271273294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,1,128,0,1,fp8,fp8,0,0.1507146656513214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,2,128,0,1,float16,float16,0,0.15678399801254272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,2,128,0,1,fp8,fp8,0,0.15053332845369974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,2,128,0,1,float16,fp8,0,0.156442662080129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,4,128,0,1,float16,float16,0,0.16367999712626138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,4,128,0,1,float16,fp8,0,0.1630880037943522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,4,128,0,1,fp8,fp8,0,0.1639359990755717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,8,128,0,1,float16,float16,0,0.16555733482042947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,8,128,0,1,float16,fp8,0,0.1637440025806427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,16,8,128,0,1,fp8,fp8,0,0.16198399662971497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,16,128,0,1,float16,fp8,0,0.09665600458780925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,16,128,0,1,float16,float16,0,0.09834133585294087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,16,128,0,1,fp8,fp8,0,0.09853333234786987
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,1,128,0,1,float16,float16,0,0.08684266606966655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,1,128,0,1,float16,fp8,0,0.08701866865158081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,1,128,0,1,fp8,fp8,0,0.08084266881148021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,2,128,0,1,float16,float16,0,0.08717333277066548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,2,128,0,1,float16,fp8,0,0.08504533767700195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,2,128,0,1,fp8,fp8,0,0.08070399860541026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,4,128,0,1,float16,float16,0,0.09087999661763509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,4,128,0,1,float16,fp8,0,0.08929600318272908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,4,128,0,1,fp8,fp8,0,0.08900800347328186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,8,128,0,1,float16,fp8,0,0.09195199608802795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,8,128,0,1,fp8,fp8,0,0.0916426678498586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,16,128,0,1,float16,float16,0,0.05226133267084757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,16,8,128,0,1,float16,float16,0,0.09179199735323589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,16,128,0,1,float16,fp8,0,0.05190933247407278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,16,128,0,1,fp8,fp8,0,0.0540533314148585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,1,128,0,1,float16,float16,0,0.04978133241335551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,1,128,0,1,float16,fp8,0,0.05046933392683665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,1,128,0,1,fp8,fp8,0,0.046165332198143005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,2,128,0,1,float16,float16,0,0.04959466556708018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,2,128,0,1,float16,fp8,0,0.05022933085759481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,2,128,0,1,fp8,fp8,0,0.04637866715590159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,4,128,0,1,float16,float16,0,0.051818668842315674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,4,128,0,1,float16,fp8,0,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,4,128,0,1,fp8,fp8,0,0.04990933338801066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,8,128,0,1,float16,float16,0,0.052245333790779114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,8,128,0,1,float16,fp8,0,0.05179200073083242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,16,8,128,0,1,fp8,fp8,0,0.04861866434415182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,16,128,0,1,float16,float16,0,0.03403733422358831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,16,128,0,1,float16,fp8,0,0.03495999922355016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,16,128,0,1,fp8,fp8,0,0.033770665526390076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,1,128,0,1,float16,float16,0,0.033285332222779594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,1,128,0,1,float16,fp8,0,0.03324799984693527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,1,128,0,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,2,128,0,1,float16,float16,0,0.033802665770053864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,2,128,0,1,float16,fp8,0,0.033146666983763375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,2,128,0,1,fp8,fp8,0,0.0320266659061114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,4,128,0,1,float16,float16,0,0.0336053321758906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,4,128,0,1,float16,fp8,0,0.03382933388153712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,4,128,0,1,fp8,fp8,0,0.03350933392842611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,8,128,0,1,float16,float16,0,0.03577066709597906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,8,128,0,1,float16,fp8,0,0.033530667424201965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,16,8,128,0,1,fp8,fp8,0,0.033914667864640556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,16,128,0,1,float16,float16,0,0.02437866727511088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,16,128,0,1,float16,fp8,0,0.02362666775782903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,16,128,0,1,fp8,fp8,0,0.02332266668478648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,1,128,0,1,float16,float16,0,0.02293866624434789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,1,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,1,128,0,1,fp8,fp8,0,0.021253332495689392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,2,128,0,1,float16,float16,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,2,128,0,1,float16,fp8,0,0.0235359991590182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,2,128,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,4,128,0,1,float16,float16,0,0.02372266600529353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,4,128,0,1,float16,fp8,0,0.02369066576162974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,4,128,0,1,fp8,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,8,128,0,1,float16,float16,0,0.024842667082945507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,8,128,0,1,float16,fp8,0,0.023520000278949738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,16,8,128,0,1,fp8,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,16,128,0,1,float16,float16,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,16,128,0,1,float16,fp8,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,16,128,0,1,fp8,fp8,0,0.019007999449968338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,1,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,1,128,0,1,float16,fp8,0,0.017397332936525345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,1,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,2,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,2,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,2,128,0,1,fp8,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,4,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,4,128,0,1,float16,float16,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,4,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,8,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,8,128,0,1,float16,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,16,8,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,16,128,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,16,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,16,128,0,1,fp8,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,1,128,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,1,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,1,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,2,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,2,128,0,1,float16,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,2,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,4,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,4,128,0,1,float16,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,4,128,0,1,fp8,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,8,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,8,128,0,1,float16,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,16,8,128,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,16,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,16,128,0,1,float16,fp8,0,0.016330666840076447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,16,128,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,1,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,1,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,1,128,0,1,fp8,fp8,0,0.01581866666674614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,2,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,2,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,2,128,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,4,128,0,1,float16,float16,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,4,128,0,1,float16,fp8,0,0.016810666769742966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,4,128,0,1,fp8,fp8,0,0.01578666642308235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,8,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,8,128,0,1,float16,fp8,0,0.01598400001724561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,16,8,128,0,1,fp8,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,16,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,16,128,0,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,16,128,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,1,128,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,1,128,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,1,128,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,2,128,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,2,128,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,2,128,0,1,fp8,fp8,0,0.016117333124081295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,4,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,4,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,4,128,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,8,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,8,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,16,8,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,1,128,0,1,float16,float16,0,0.11799466609954834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,1,128,0,1,float16,fp8,0,0.11794666449228923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,1,128,0,1,fp8,fp8,0,0.11070932944615682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,2,128,0,1,float16,float16,0,0.11974933743476868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,2,128,0,1,fp8,fp8,0,0.1109279990196228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,4,128,0,1,float16,float16,0,0.12159466743469238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,2,128,0,1,float16,fp8,0,0.11715733011563619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,4,128,0,1,float16,fp8,0,0.12177600463231404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,4,128,0,1,fp8,fp8,0,0.11865599950154622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,8,128,0,1,float16,float16,0,0.12171199917793274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,8,128,0,1,float16,fp8,0,0.12166933218638103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,16,8,128,0,1,fp8,fp8,0,0.1200266679128011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,16,128,0,1,float16,float16,0,0.06877866884072621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,16,128,0,1,float16,fp8,0,0.06695466736952464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,16,128,0,1,fp8,fp8,0,0.06727466483910878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,1,128,0,1,float16,float16,0,0.06566399832566579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,1,128,0,1,float16,fp8,0,0.06459733347098033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,1,128,0,1,fp8,fp8,0,0.060602664947509766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,2,128,0,1,float16,float16,0,0.06665066878000896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,2,128,0,1,float16,fp8,0,0.06631466746330261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,2,128,0,1,fp8,fp8,0,0.061290666460990906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,4,128,0,1,float16,float16,0,0.06830400228500366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,4,128,0,1,float16,fp8,0,0.06785066425800323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,8,128,0,1,float16,float16,0,0.06843199829260509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,4,128,0,1,fp8,fp8,0,0.06453866759936015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,8,128,0,1,float16,fp8,0,0.06823466718196869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,16,128,0,1,float16,float16,0,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,16,8,128,0,1,fp8,fp8,0,0.06631466746330261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,16,128,0,1,float16,fp8,0,0.04193066557248434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,16,128,0,1,fp8,fp8,0,0.0397173340121905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,1,128,0,1,float16,float16,0,0.039818666875362396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,1,128,0,1,float16,fp8,0,0.03956266740957896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,1,128,0,1,fp8,fp8,0,0.03794133414824804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,2,128,0,1,float16,float16,0,0.039647998909155525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,2,128,0,1,float16,fp8,0,0.04077333211898804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,4,128,0,1,float16,float16,0,0.041162667175134025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,2,128,0,1,fp8,fp8,0,0.0388373335202535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,4,128,0,1,float16,fp8,0,0.041722665230433144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,4,128,0,1,fp8,fp8,0,0.040021332601706185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,8,128,0,1,float16,float16,0,0.04190400242805481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,8,128,0,1,float16,fp8,0,0.042117332418759666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,16,128,0,1,float16,float16,0,0.028255999088287354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,16,8,128,0,1,fp8,fp8,0,0.04148799926042557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,16,128,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,16,128,0,1,fp8,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,1,128,0,1,float16,float16,0,0.027600000301996868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,1,128,0,1,float16,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,1,128,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,2,128,0,1,float16,float16,0,0.02762666592995326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,2,128,0,1,float16,fp8,0,0.02743999908367793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,2,128,0,1,fp8,fp8,0,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,4,128,0,1,float16,float16,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,4,128,0,1,float16,fp8,0,0.027749332288901012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,4,128,0,1,fp8,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,8,128,0,1,float16,float16,0,0.02754666656255722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,8,128,0,1,float16,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,16,8,128,0,1,fp8,fp8,0,0.02752533306678136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,16,128,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,16,128,0,1,float16,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,16,128,0,1,fp8,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,1,128,0,1,float16,float16,0,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,1,128,0,1,float16,fp8,0,0.020869334538777668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,1,128,0,1,fp8,fp8,0,0.020074666788180668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,2,128,0,1,float16,float16,0,0.020010666300853092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,2,128,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,2,128,0,1,fp8,fp8,0,0.019359999646743137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,4,128,0,1,float16,float16,0,0.02067733307679494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,4,128,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,4,128,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,8,128,0,1,float16,float16,0,0.020874666670958202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,8,128,0,1,float16,fp8,0,0.019738666713237762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,16,8,128,0,1,fp8,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,16,128,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,16,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,16,128,0,1,float16,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,1,128,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,1,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,1,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,2,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,2,128,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,2,128,0,1,fp8,fp8,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,4,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,4,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,4,128,0,1,fp8,fp8,0,0.016565332810084026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,8,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,8,128,0,1,float16,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,16,8,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,16,128,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,16,128,0,1,float16,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,16,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,1,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,1,128,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,1,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,2,128,0,1,float16,fp8,0,0.017263999829689663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,2,128,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,4,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,4,128,0,1,float16,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,4,128,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,8,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,8,128,0,1,fp8,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,16,8,128,0,1,float16,float16,0,0.016122666498025257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,16,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,16,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,16,128,0,1,fp8,fp8,0,0.016208000481128693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,1,128,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,1,128,0,1,float16,float16,0,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,1,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,2,128,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,2,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,2,128,0,1,fp8,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,4,128,0,1,float16,float16,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,4,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,8,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,8,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,4,128,0,1,fp8,fp8,0,0.01589866727590561
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,16,128,0,1,float16,float16,0,0.01562133307258288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,16,8,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,16,128,0,1,float16,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,16,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,1,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,1,128,0,1,float16,fp8,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,1,128,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,2,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,2,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,2,128,0,1,float16,fp8,0,0.01573333392540614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,4,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,4,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,4,128,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,8,128,0,1,float16,float16,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,8,128,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,16,8,128,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,1,128,0,1,float16,float16,0,0.09705600142478943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,1,128,0,1,float16,fp8,0,0.09728533029556274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,1,128,0,1,fp8,fp8,0,0.09345600008964539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,2,128,0,1,float16,float16,0,0.0990559955437978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,2,128,0,1,float16,fp8,0,0.09749333063761394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,2,128,0,1,fp8,fp8,0,0.0941493312517802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,4,128,0,1,float16,fp8,0,0.09864532947540283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,4,128,0,1,float16,float16,0,0.09969600041707356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,4,128,0,1,fp8,fp8,0,0.09525866309801738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,8,128,0,1,float16,float16,0,0.09900800387064616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,8,128,0,1,float16,fp8,0,0.09937066833178203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,16,8,128,0,1,fp8,fp8,0,0.09732799728711446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,16,128,0,1,float16,float16,0,0.05823466678460439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,16,128,0,1,float16,fp8,0,0.05859733124574026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,16,128,0,1,fp8,fp8,0,0.056277334690093994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,1,128,0,1,float16,float16,0,0.05750933289527893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,1,128,0,1,float16,fp8,0,0.05645333230495453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,1,128,0,1,fp8,fp8,0,0.05438933273156484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,2,128,0,1,float16,float16,0,0.05816533168156942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,2,128,0,1,float16,fp8,0,0.05639466643333435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,2,128,0,1,fp8,fp8,0,0.0545066644748052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,4,128,0,1,float16,float16,0,0.058506667613983154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,4,128,0,1,float16,fp8,0,0.058005332946777344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,4,128,0,1,fp8,fp8,0,0.056330665946006775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,8,128,0,1,float16,float16,0,0.05801066756248474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,8,128,0,1,float16,fp8,0,0.05779199798901876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,16,128,0,1,float16,float16,0,0.035562666753927864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,16,8,128,0,1,fp8,fp8,0,0.05639466643333435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,16,128,0,1,float16,fp8,0,0.035760000348091125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,16,128,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,1,128,0,1,float16,float16,0,0.03412266572316488
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,1,128,0,1,float16,fp8,0,0.035802667339642845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,1,128,0,1,fp8,fp8,0,0.03342399994532267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,2,128,0,1,float16,float16,0,0.03457599878311157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,2,128,0,1,float16,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,2,128,0,1,fp8,fp8,0,0.033802665770053864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,4,128,0,1,float16,float16,0,0.0358240008354187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,4,128,0,1,float16,fp8,0,0.035786665976047516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,4,128,0,1,fp8,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,8,128,0,1,float16,float16,0,0.03547733277082443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,8,128,0,1,float16,fp8,0,0.035616000493367515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,16,8,128,0,1,fp8,fp8,0,0.03551466763019562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,16,128,0,1,float16,float16,0,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,16,128,0,1,float16,fp8,0,0.02536533276240031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,16,128,0,1,fp8,fp8,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,1,128,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,1,128,0,1,float16,fp8,0,0.025072000920772552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,1,128,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,2,128,0,1,float16,float16,0,0.023765332996845245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,2,128,0,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,2,128,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,4,128,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,8,128,0,1,float16,float16,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,4,128,0,1,fp8,fp8,0,0.023397333920001984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,4,128,0,1,float16,float16,0,0.024720000723997753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,8,128,0,1,float16,fp8,0,0.02532800038655599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,16,8,128,0,1,fp8,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,16,128,0,1,float16,float16,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,16,128,0,1,float16,fp8,0,0.0198186660806338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,16,128,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,1,128,0,1,float16,float16,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,1,128,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,2,128,0,1,float16,float16,0,0.01887999971707662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,2,128,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,1,128,0,1,fp8,fp8,0,0.019776000330845516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,2,128,0,1,fp8,fp8,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,4,128,0,1,float16,float16,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,4,128,0,1,float16,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,8,128,0,1,float16,float16,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,4,128,0,1,fp8,fp8,0,0.01971199984351794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,8,128,0,1,fp8,fp8,0,0.018863999595244724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,16,8,128,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,16,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,16,128,0,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,16,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,1,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,1,128,0,1,float16,fp8,0,0.01691199963291486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,2,128,0,1,float16,float16,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,1,128,0,1,fp8,fp8,0,0.01764800027012825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,2,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,2,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,4,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,4,128,0,1,float16,float16,0,0.015520000209410986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,4,128,0,1,fp8,fp8,0,0.015615999698638916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,8,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,8,128,0,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,16,8,128,0,1,fp8,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,16,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,16,128,0,1,float16,fp8,0,0.016442666451136272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,16,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,1,128,0,1,float16,float16,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,1,128,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,1,128,0,1,float16,fp8,0,0.01634666696190834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,2,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,2,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,2,128,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,4,128,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,4,128,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,4,128,0,1,fp8,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,8,128,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,8,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,16,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,16,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,16,8,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,1,128,0,1,float16,float16,0,0.015935999651749928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,16,128,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,1,128,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,1,128,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,2,128,0,1,float16,float16,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,2,128,0,1,float16,fp8,0,0.016085332880417507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,2,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,4,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,4,128,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,4,128,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,8,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,8,128,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,16,8,128,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,16,128,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,16,128,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,16,128,0,1,fp8,fp8,0,0.016442666451136272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,1,128,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,1,128,0,1,fp8,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,1,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,2,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,2,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,2,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,4,128,0,1,float16,float16,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,4,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,4,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,8,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,8,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,16,8,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,0,0.08307733138402303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,0,0.08297599852085114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,1,128,0,1,fp8,fp8,0,0.07666666805744171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,0,0.0828000009059906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,0,0.08333866794904073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,2,128,0,1,fp8,fp8,0,0.07674666742483775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,0,0.08268266419569652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,0,0.08317866424719493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,4,128,0,1,fp8,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,0,0.08343999584515889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,0,0.08297599852085114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,16,8,128,0,1,fp8,fp8,0,0.07865599791208903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,0,0.04957866668701172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,0,0.048101335763931274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,16,128,0,1,fp8,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,0,0.048063998421033226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,0,0.04808533191680908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,1,128,0,1,fp8,fp8,0,0.045925334095954895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,0,0.04795200129350027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,0,0.049733335773150124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,2,128,0,1,fp8,fp8,0,0.046015997727712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,0,0.04991999765237173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,0,0.049839998284975685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,4,128,0,1,fp8,fp8,0,0.045663997530937195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,0,0.048122664292653404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,0,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,16,8,128,0,1,fp8,fp8,0,0.045941332976023354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,16,128,0,1,fp8,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,0,0.03158933420976003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,1,128,0,1,fp8,fp8,0,0.030447999636332195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,0,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,0,0.03173866619666418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,2,128,0,1,fp8,fp8,0,0.029285334050655365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,0,0.03260799994071325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,0,0.03330666571855545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,4,128,0,1,fp8,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,0,0.03153600047032038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,16,8,128,0,1,fp8,fp8,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,0,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,16,128,0,1,fp8,fp8,0,0.02310933421055476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,0,0.023391999304294586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,1,128,0,1,fp8,fp8,0,0.023039999107519787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,0,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,0,0.024879999458789825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,2,128,0,1,fp8,fp8,0,0.021776000658671062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,0,0.02515200028816859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,4,128,0,1,fp8,fp8,0,0.022202665607134502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,0,0.023647998770078022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,0,0.023685333629449207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,16,8,128,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,0,0.019130667050679524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,16,128,0,1,fp8,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,0,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,1,128,0,1,fp8,fp8,0,0.019498666127522785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,0,0.02111999938885371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,2,128,0,1,fp8,fp8,0,0.017535999417304993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,0,0.02252800017595291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,0,0.021749332547187805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,4,128,0,1,fp8,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,16,8,128,0,1,fp8,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,16,128,0,1,fp8,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,1,128,0,1,fp8,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,0,0.017242666333913803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,2,128,0,1,fp8,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,4,128,0,1,fp8,fp8,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,16,8,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,16,128,0,1,fp8,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,1,128,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,2,128,0,1,fp8,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,0,0.015552000453074774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,4,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,16,8,128,0,1,fp8,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,16,128,0,1,fp8,fp8,0,0.014869333555301031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,1,128,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,2,128,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,4,128,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,16,8,128,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,16,128,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,1,128,0,1,fp8,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,2,128,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,4,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,0,0.01488000030318896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,16,8,128,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,1,128,0,1,fp8,fp8,0,3.3594398498535156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,float16,0,4.146191914876302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,1,128,0,1,float16,fp8,0,4.307973225911458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,float16,0,4.2986345291137695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,2,128,0,1,fp8,fp8,0,3.3646186192830405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,2,128,0,1,float16,fp8,0,4.317173322041829
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,float16,0,4.301530520121257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,4,128,0,1,float16,fp8,0,4.301936149597168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,float16,0,2.1907893816630044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,12,128,0,1,float16,fp8,0,2.214421272277832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,12,4,128,0,1,fp8,fp8,0,3.3889172871907554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,12,128,0,1,fp8,fp8,0,1.8261653582255046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,float16,0,2.1201866467793784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,1,128,0,1,fp8,fp8,0,1.7523786226908367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,1,128,0,1,float16,fp8,0,2.169605255126953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,float16,0,2.12989870707194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,2,128,0,1,fp8,fp8,0,1.7483466466267903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,2,128,0,1,float16,fp8,0,2.1258346239725747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,float16,0,2.1474666595458984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,4,128,0,1,fp8,fp8,0,1.8975253105163574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,12,4,128,0,1,float16,fp8,0,2.1341066360473633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,float16,0,1.2343573570251465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,12,128,0,1,float16,fp8,0,1.1728479862213135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,12,128,0,1,fp8,fp8,0,1.0469653606414795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,float16,0,1.1345547040303547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,1,128,0,1,fp8,fp8,0,0.9519466559092203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,1,128,0,1,float16,fp8,0,1.1174720128377278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,float16,0,1.1404266357421875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,2,128,0,1,float16,fp8,0,1.136362632115682
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,2,128,0,1,fp8,fp8,0,0.9533493518829346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,float16,0,1.1334559917449951
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,4,128,0,1,float16,fp8,0,1.1708266735076904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,float16,0,0.6709386507670084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,12,4,128,0,1,fp8,fp8,0,0.96396803855896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,12,128,0,1,float16,fp8,0,0.6715786457061768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,12,128,0,1,fp8,fp8,0,0.5763839880625407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,float16,0,0.6324746608734131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,1,128,0,1,fp8,fp8,0,0.5707840124766032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,1,128,0,1,float16,fp8,0,0.630677342414856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,float16,0,0.6458293199539185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,2,128,0,1,float16,fp8,0,0.6368106603622437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,2,128,0,1,fp8,fp8,0,0.5555093288421631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,float16,0,0.65447998046875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,4,128,0,1,float16,fp8,0,0.6570880015691122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,12,4,128,0,1,fp8,fp8,0,0.5610719919204712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,float16,0,2.43614927927653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,1,128,0,1,float16,fp8,0,2.4802986780802407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,1,128,0,1,fp8,fp8,0,2.0407892862955728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,float16,0,2.491205374399821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,2,128,0,1,fp8,fp8,0,2.0466133753458657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,2,128,0,1,float16,fp8,0,2.520869255065918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,float16,0,2.4969493548075357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,4,128,0,1,float16,fp8,0,2.5360107421875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,float16,0,1.3465280532836914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,12,128,0,1,float16,fp8,0,1.3780694007873535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,12,4,128,0,1,fp8,fp8,0,2.0723679860432944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,12,128,0,1,fp8,fp8,0,1.1341439882914226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,float16,0,1.2719093163808186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,1,128,0,1,float16,fp8,0,1.3037760257720947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,1,128,0,1,fp8,fp8,0,1.0759999752044678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,float16,0,1.2841920057932537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,2,128,0,1,fp8,fp8,0,1.080672025680542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,2,128,0,1,float16,fp8,0,1.284559965133667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,float16,0,1.308853308359782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,4,128,0,1,float16,fp8,0,1.3088213602701824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,float16,0,0.7623039881388346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,12,128,0,1,float16,fp8,0,0.7618559996287028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,12,4,128,0,1,fp8,fp8,0,1.096992015838623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,12,128,0,1,fp8,fp8,0,0.6262240012486776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,float16,0,0.7035893599192301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,1,128,0,1,fp8,fp8,0,0.5997493267059326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,1,128,0,1,float16,fp8,0,0.6935466925303141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,float16,0,0.7062719662984213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,2,128,0,1,float16,fp8,0,0.6999039649963379
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,2,128,0,1,fp8,fp8,0,0.6036479870478312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,float16,0,0.711738665898641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,4,128,0,1,float16,fp8,0,0.7172160148620605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,float16,0,0.43994665145874023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,12,128,0,1,float16,fp8,0,0.4424639940261841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,12,4,128,0,1,fp8,fp8,0,0.607205351193746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,12,128,0,1,fp8,fp8,0,0.37727999687194824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,float16,0,0.4033600091934204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,1,128,0,1,float16,fp8,0,0.40196800231933594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,1,128,0,1,fp8,fp8,0,0.3548693259557088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,float16,0,0.40148266156514484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,2,128,0,1,float16,fp8,0,0.40541334946950275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,2,128,0,1,fp8,fp8,0,0.35624531904856366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,float16,0,0.41021867593129474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,4,128,0,1,float16,fp8,0,0.4111200173695882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,12,4,128,0,1,fp8,fp8,0,0.36554133892059326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,float16,0,1.8093280792236328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,1,128,0,1,float16,fp8,0,1.7730347315470378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,1,128,0,1,fp8,fp8,0,1.4971787134806316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,float16,0,1.807653268178304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,2,128,0,1,fp8,fp8,0,1.5057652791341145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,2,128,0,1,float16,fp8,0,1.8313546180725098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,float16,0,1.8200213114420574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,4,128,0,1,float16,fp8,0,1.8609973589579265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,float16,0,1.0139626661936443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,12,128,0,1,float16,fp8,0,1.0384533405303955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,12,4,128,0,1,fp8,fp8,0,1.5267359415690105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,12,128,0,1,fp8,fp8,0,0.8972746531168619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,float16,0,0.9392426808675131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,1,128,0,1,fp8,fp8,0,0.8000266551971436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,1,128,0,1,float16,fp8,0,0.9441546599070231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,float16,0,0.9688586393992106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,2,128,0,1,float16,fp8,0,0.9592853387196859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,2,128,0,1,fp8,fp8,0,0.8033440113067627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,float16,0,0.9721173445383707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,4,128,0,1,float16,fp8,0,0.9720533688863119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,float16,0,0.5625919898351034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,12,4,128,0,1,fp8,fp8,0,0.8240799903869629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,12,128,0,1,float16,fp8,0,0.5767413377761841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,12,128,0,1,fp8,fp8,0,0.47937599817911786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,float16,0,0.5115360021591187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,1,128,0,1,fp8,fp8,0,0.4612213373184204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,1,128,0,1,float16,fp8,0,0.5162133375803629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,float16,0,0.518885334332784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,2,128,0,1,fp8,fp8,0,0.45980266729990643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,2,128,0,1,float16,fp8,0,0.5248426596323649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,float16,0,0.5290879805882772
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,4,128,0,1,fp8,fp8,0,0.4596213499704997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,12,4,128,0,1,float16,fp8,0,0.5350186824798584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,float16,0,0.33714667956034344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,12,128,0,1,float16,fp8,0,0.3409866491953532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,12,128,0,1,fp8,fp8,0,0.29044799009958905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,float16,0,0.30433066685994464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,1,128,0,1,fp8,fp8,0,0.27349867423375446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,1,128,0,1,float16,fp8,0,0.30772799253463745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,float16,0,0.3087093234062195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,2,128,0,1,fp8,fp8,0,0.2737119992574056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,2,128,0,1,float16,fp8,0,0.3085813323656718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,float16,0,0.31066666046778363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,4,128,0,1,float16,fp8,0,0.3118613362312317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,12,4,128,0,1,fp8,fp8,0,0.27940799792607623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,1,128,0,1,fp8,fp8,0,2.01200532913208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,float16,0,2.381125291188558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,1,128,0,1,float16,fp8,0,2.4058666229248047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,2,128,0,1,fp8,fp8,0,2.021968046824137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,float16,0,2.4170239766438804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,2,128,0,1,float16,fp8,0,2.4408532778422036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,float16,0,2.43340794245402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,float16,0,1.338688055674235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,4,128,0,1,fp8,fp8,0,2.0463573137919107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,12,128,0,1,float16,fp8,0,1.3824532826741536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,12,4,128,0,1,float16,fp8,0,2.459749380747477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,12,128,0,1,fp8,fp8,0,1.1498506863911946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,float16,0,1.2269333203633626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,1,128,0,1,fp8,fp8,0,1.067855993906657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,1,128,0,1,float16,fp8,0,1.2285172939300537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,float16,0,1.2264213562011719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,2,128,0,1,fp8,fp8,0,1.0475359757741292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,2,128,0,1,float16,fp8,0,1.2445440292358398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,float16,0,1.2552746931711833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,4,128,0,1,float16,fp8,0,1.2648106416066487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,float16,0,0.7211840152740479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,12,128,0,1,float16,fp8,0,0.7255626519521078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,12,4,128,0,1,fp8,fp8,0,1.0624693234761555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,12,128,0,1,fp8,fp8,0,0.6022506554921468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,float16,0,0.6510133345921835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,1,128,0,1,fp8,fp8,0,0.565285325050354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,1,128,0,1,float16,fp8,0,0.6530826489130656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,float16,0,0.6595413287480673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,2,128,0,1,float16,fp8,0,0.6632693211237589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,2,128,0,1,fp8,fp8,0,0.5682613452275594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,float16,0,0.6711359818776449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,float16,0,0.40538668632507324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,4,128,0,1,fp8,fp8,0,0.5745866696039835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,12,4,128,0,1,float16,fp8,0,0.6705333391825358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,12,128,0,1,float16,fp8,0,0.40968533356984455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,12,128,0,1,fp8,fp8,0,0.34718934694925946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,float16,0,0.3615786631902059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,1,128,0,1,float16,fp8,0,0.3615093231201172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,1,128,0,1,fp8,fp8,0,0.32421332597732544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,float16,0,0.36583999792734784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,2,128,0,1,float16,fp8,0,0.36633598804473877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,2,128,0,1,fp8,fp8,0,0.32716800769170123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,float16,0,0.37803200880686444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,4,128,0,1,float16,fp8,0,0.3803679943084717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,float16,0,0.2478613257408142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,12,4,128,0,1,fp8,fp8,0,0.3311840097109477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,12,128,0,1,float16,fp8,0,0.24759467442830405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,12,128,0,1,fp8,fp8,0,0.21692800521850586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,float16,0,0.22102399667104086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,1,128,0,1,float16,fp8,0,0.22393065690994263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,1,128,0,1,fp8,fp8,0,0.2012373407681783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,float16,0,0.22573333978652954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,2,128,0,1,float16,fp8,0,0.22381333510080972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,2,128,0,1,fp8,fp8,0,0.20143999656041464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,float16,0,0.22272533178329468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,4,128,0,1,float16,fp8,0,0.22627200682957968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,12,4,128,0,1,fp8,fp8,0,0.20298133293787637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,float16,0,1.479423999786377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,1,128,0,1,fp8,fp8,0,1.2660266558329265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,1,128,0,1,float16,fp8,0,1.4850880304972331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,float16,0,1.4842079480489094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,2,128,0,1,fp8,fp8,0,1.27401598294576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,2,128,0,1,float16,fp8,0,1.5064266522725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,float16,0,1.5354773203531902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,4,128,0,1,float16,fp8,0,1.5231787363688152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,float16,0,0.8830080032348633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,12,128,0,1,float16,fp8,0,0.8685653209686279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,12,128,0,1,fp8,fp8,0,0.7417866388956705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,12,4,128,0,1,fp8,fp8,0,1.29749329884847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,float16,0,0.769765297571818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,1,128,0,1,float16,fp8,0,0.7784319718678793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,1,128,0,1,fp8,fp8,0,0.6904906431833903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,float16,0,0.7825173536936442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,2,128,0,1,float16,fp8,0,0.7989706993103027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,2,128,0,1,fp8,fp8,0,0.6694773038228353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,float16,0,0.7883040110270182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,4,128,0,1,float16,fp8,0,0.8005759716033936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,float16,0,0.46767465273539227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,12,128,0,1,fp8,fp8,0,0.40009065469106037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,12,128,0,1,float16,fp8,0,0.47014399369557697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,12,4,128,0,1,fp8,fp8,0,0.6793546676635742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,float16,0,0.4156800111134847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,1,128,0,1,fp8,fp8,0,0.3661866585413615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,1,128,0,1,float16,fp8,0,0.4148373206456502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,2,128,0,1,fp8,fp8,0,0.36994131406148273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,float16,0,0.42270398139953613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,2,128,0,1,float16,fp8,0,0.42293866475423175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,4,128,0,1,fp8,fp8,0,0.3760266701380412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,float16,0,0.4304320017496745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,12,4,128,0,1,float16,fp8,0,0.4354986747105916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,12,128,0,1,fp8,fp8,0,0.2346880038579305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,fp8,0,0.27478400866190594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,12,128,0,1,float16,float16,0,0.2733653386433919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,float16,0,0.23781333367029825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,1,128,0,1,fp8,fp8,0,0.21253333489100137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,1,128,0,1,float16,fp8,0,0.23656533161799112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,float16,0,0.2402133345603943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,2,128,0,1,fp8,fp8,0,0.21619733174641928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,2,128,0,1,float16,fp8,0,0.24011733134587607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,float16,0,0.2450666626294454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,4,128,0,1,fp8,fp8,0,0.22313600778579712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,12,4,128,0,1,float16,fp8,0,0.24421334266662598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,float16,0,0.16296533743540445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,12,128,0,1,float16,fp8,0,0.1639306644598643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,12,128,0,1,fp8,fp8,0,0.15043733517328897
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,float16,0,0.15474133690198263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,1,128,0,1,fp8,fp8,0,0.14220800002415976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,1,128,0,1,float16,fp8,0,0.1548960010210673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,float16,0,0.1565013329188029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,2,128,0,1,float16,fp8,0,0.15678399801254272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,2,128,0,1,fp8,fp8,0,0.1437279979387919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,float16,0,0.15795200069745383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,4,128,0,1,fp8,fp8,0,0.14402666687965393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,12,4,128,0,1,float16,fp8,0,0.15757866700490317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,1,128,0,1,fp8,fp8,0,1.333408037821452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,float16,0,1.5458240509033203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,1,128,0,1,float16,fp8,0,1.5461494127909343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,float16,0,1.550015926361084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,2,128,0,1,float16,fp8,0,1.5681066513061523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,2,128,0,1,fp8,fp8,0,1.3461707433064778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,float16,0,1.5962665875752766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,float16,0,0.8967253367106119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,12,128,0,1,float16,fp8,0,0.9054880142211914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,4,128,0,1,fp8,fp8,0,1.3722346623738606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,12,4,128,0,1,float16,fp8,0,1.5927519798278809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,12,128,0,1,fp8,fp8,0,0.7704853216807047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,float16,0,0.7883306344350179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,1,128,0,1,float16,fp8,0,0.7936480045318604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,1,128,0,1,fp8,fp8,0,0.7358026504516602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,float16,0,0.7983787059783936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,2,128,0,1,float16,fp8,0,0.8216426372528076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,2,128,0,1,fp8,fp8,0,0.6908426284790039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,float16,0,0.8524746894836426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,float16,0,0.477509339650472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,4,128,0,1,fp8,fp8,0,0.7066453297932943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,12,4,128,0,1,float16,fp8,0,0.8213919798533121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,12,128,0,1,float16,fp8,0,0.48583467801411945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,12,128,0,1,fp8,fp8,0,0.41179200013478595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,float16,0,0.41843732198079425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,1,128,0,1,float16,fp8,0,0.41883734862009686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,1,128,0,1,fp8,fp8,0,0.3676160176595052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,float16,0,0.4238773187001546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,2,128,0,1,float16,fp8,0,0.42479999860127765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,2,128,0,1,fp8,fp8,0,0.37132267157236737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,float16,0,0.430400013923645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,4,128,0,1,float16,fp8,0,0.4363413254419963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,12,4,128,0,1,fp8,fp8,0,0.3779999812444051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,float16,0,0.2662559946378072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,12,128,0,1,float16,fp8,0,0.2680799961090088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,12,128,0,1,fp8,fp8,0,0.231605331103007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,float16,0,0.22691200176874796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,1,128,0,1,float16,fp8,0,0.22994667291641235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,1,128,0,1,fp8,fp8,0,0.20863999923070273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,float16,0,0.2318293253580729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,2,128,0,1,float16,fp8,0,0.23368000984191895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,2,128,0,1,fp8,fp8,0,0.2104319930076599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,float16,0,0.23911466201146445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,4,128,0,1,float16,fp8,0,0.2434879938761393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,float16,0,0.15577600399653116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,12,4,128,0,1,fp8,fp8,0,0.21541333198547363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,12,128,0,1,float16,fp8,0,0.1556426684061686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,12,128,0,1,fp8,fp8,0,0.14029866456985474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,float16,0,0.13686933120091757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,1,128,0,1,float16,fp8,0,0.13916266957918802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,1,128,0,1,fp8,fp8,0,0.12381866574287415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,float16,0,0.1404213309288025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,2,128,0,1,fp8,fp8,0,0.1251466671625773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,2,128,0,1,float16,fp8,0,0.13779733578364053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,float16,0,0.1383039951324463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,4,128,0,1,float16,fp8,0,0.14246400197347006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,float16,0,0.09868266185124715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,12,4,128,0,1,fp8,fp8,0,0.12833600242932638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,12,128,0,1,float16,fp8,0,0.09944533308347066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,12,128,0,1,fp8,fp8,0,0.09089600046475728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,1,128,0,1,fp8,fp8,0,0.08909866213798523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,float16,0,0.09520533680915833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,1,128,0,1,float16,fp8,0,0.09495466947555542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,float16,0,0.0953546663125356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,2,128,0,1,float16,fp8,0,0.09556800127029419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,2,128,0,1,fp8,fp8,0,0.08866666754086812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,float16,0,0.09690666198730469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,4,128,0,1,float16,fp8,0,0.09515733520189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,12,4,128,0,1,fp8,fp8,0,0.08886399865150452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,float16,0,1.0074506600697835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,1,128,0,1,fp8,fp8,0,0.8788213729858398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,1,128,0,1,float16,fp8,0,1.0013226668039958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,float16,0,1.0176959832509358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,2,128,0,1,fp8,fp8,0,0.8885440031687418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,2,128,0,1,float16,fp8,0,1.02129069964091
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,float16,0,1.0455679893493652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,4,128,0,1,fp8,fp8,0,0.9079626401265463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,12,4,128,0,1,float16,fp8,0,1.0472746690114338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,float16,0,0.5932960112889608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,12,128,0,1,fp8,fp8,0,0.515450676282247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,12,128,0,1,float16,fp8,0,0.5987626711527506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,float16,0,0.5199946562449137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,1,128,0,1,fp8,fp8,0,0.4569386641184489
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,1,128,0,1,float16,fp8,0,0.5168906847635905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,2,128,0,1,fp8,fp8,0,0.46051732699076336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,float16,0,0.5261439879735311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,2,128,0,1,float16,fp8,0,0.5290986696879069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,4,128,0,1,fp8,fp8,0,0.4707039992014567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,float16,0,0.538426677385966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,12,4,128,0,1,float16,fp8,0,0.5476640065511068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,float16,0,0.31903467575709027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,12,128,0,1,fp8,fp8,0,0.2794026732444763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,12,128,0,1,float16,fp8,0,0.324346661567688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,float16,0,0.2783679962158203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,1,128,0,1,fp8,fp8,0,0.24901866912841797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,1,128,0,1,float16,fp8,0,0.27821866671244305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,float16,0,0.281056006749471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,2,128,0,1,float16,fp8,0,0.28410667181015015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,2,128,0,1,fp8,fp8,0,0.2512106696764628
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,float16,0,0.2906720042228699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,4,128,0,1,float16,fp8,0,0.2932213346163432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,12,4,128,0,1,fp8,fp8,0,0.257258673508962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,float16,0,0.18264534076054892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,12,128,0,1,float16,fp8,0,0.1856266657511393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,12,128,0,1,fp8,fp8,0,0.1612320045630137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,float16,0,0.15269866585731506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,1,128,0,1,float16,fp8,0,0.15427199999491373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,1,128,0,1,fp8,fp8,0,0.13989333311716715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,float16,0,0.15454933047294617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,2,128,0,1,float16,fp8,0,0.15588266650835672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,2,128,0,1,fp8,fp8,0,0.14415466785430908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,fp8,0,0.16085867087046304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,4,128,0,1,float16,float16,0,0.16230400403340658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,12,4,128,0,1,fp8,fp8,0,0.1482186714808146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,float16,0,0.10545066992441814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,12,128,0,1,float16,fp8,0,0.10577066739400227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,12,128,0,1,fp8,fp8,0,0.10149332880973816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,float16,0,0.09924800197283427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,1,128,0,1,float16,fp8,0,0.10018133123715718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,1,128,0,1,fp8,fp8,0,0.08913600444793701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,float16,0,0.09915199875831604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,2,128,0,1,float16,fp8,0,0.10012267033259074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,2,128,0,1,fp8,fp8,0,0.09118400017420451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,float16,0,0.10033067067464192
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,4,128,0,1,float16,fp8,0,0.10051733255386353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,12,4,128,0,1,fp8,fp8,0,0.09200533231099446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,float16,0,0.06830933193365733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,12,128,0,1,float16,fp8,0,0.0689386675755183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,12,128,0,1,fp8,fp8,0,0.0642133355140686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,float16,0,0.06647466619809468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,1,128,0,1,float16,fp8,0,0.06654400130112965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,1,128,0,1,fp8,fp8,0,0.0621013343334198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,float16,0,0.06642666459083557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,2,128,0,1,float16,fp8,0,0.06700799862543742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,2,128,0,1,fp8,fp8,0,0.062447999914487205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,float16,0,0.06632000207901001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,4,128,0,1,float16,fp8,0,0.06840000053246816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,12,4,128,0,1,fp8,fp8,0,0.06316799918810527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,float16,0,1.1269226868947346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,1,128,0,1,fp8,fp8,0,0.9858667055765787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,1,128,0,1,float16,fp8,0,1.124783992767334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,float16,0,1.1470346450805664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,2,128,0,1,fp8,fp8,0,1.000165303548177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,2,128,0,1,float16,fp8,0,1.1484159628550212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,float16,0,1.1724533240000408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,4,128,0,1,float16,fp8,0,1.1850879987080891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,float16,0,0.6579999923706055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,12,128,0,1,float16,fp8,0,0.6648586591084799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,12,128,0,1,fp8,fp8,0,0.5805493195851644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,12,4,128,0,1,fp8,fp8,0,1.0225813388824463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,float16,0,0.570746660232544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,1,128,0,1,fp8,fp8,0,0.5070826609929403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,1,128,0,1,float16,fp8,0,0.5738293329874674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,float16,0,0.5802026589711508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,2,128,0,1,float16,fp8,0,0.5845066706339518
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,2,128,0,1,fp8,fp8,0,0.5128213167190552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,float16,0,0.5990133285522461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,4,128,0,1,float16,fp8,0,0.6063520113627116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,float16,0,0.3476639986038208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,12,4,128,0,1,fp8,fp8,0,0.5271039803822836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,12,128,0,1,float16,fp8,0,0.35309867064158124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,12,128,0,1,fp8,fp8,0,0.3088746666908264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,float16,0,0.3019040028254191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,1,128,0,1,float16,fp8,0,0.302784005800883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,1,128,0,1,fp8,fp8,0,0.27107733488082886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,float16,0,0.3078026572863261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,2,128,0,1,float16,fp8,0,0.30744532744089764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,2,128,0,1,fp8,fp8,0,0.27346134185791016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,float16,0,0.31651200850804645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,4,128,0,1,float16,fp8,0,0.32029332717259723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,12,4,128,0,1,fp8,fp8,0,0.2797546585400899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,float16,0,0.19283199310302734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,12,128,0,1,float16,fp8,0,0.19641600052515665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,12,128,0,1,fp8,fp8,0,0.17277334133783975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,float16,0,0.16142400105794272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,1,128,0,1,float16,fp8,0,0.16400532921155295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,1,128,0,1,fp8,fp8,0,0.15076800187428793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,float16,0,0.1650879979133606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,2,128,0,1,float16,fp8,0,0.1677280068397522
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,2,128,0,1,fp8,fp8,0,0.15268266201019287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,float16,0,0.17392534017562866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,4,128,0,1,float16,fp8,0,0.17616534233093262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,12,4,128,0,1,fp8,fp8,0,0.15620799859364828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,float16,0,0.11153599619865417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,12,128,0,1,float16,fp8,0,0.11344533165295918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,12,128,0,1,fp8,fp8,0,0.10132267077763875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,float16,0,0.0969493289788564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,1,128,0,1,float16,fp8,0,0.09683199723561604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,1,128,0,1,fp8,fp8,0,0.08714666962623596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,fp8,0,0.09708799918492635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,2,128,0,1,float16,float16,0,0.09755200147628784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,2,128,0,1,fp8,fp8,0,0.08753066261609395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,float16,0,0.09726933638254802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,4,128,0,1,float16,fp8,0,0.0988159974416097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,12,4,128,0,1,fp8,fp8,0,0.09032533566157024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,float16,0,0.0684746652841568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,12,128,0,1,float16,fp8,0,0.07034666836261749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,12,128,0,1,fp8,fp8,0,0.06423999865849812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,float16,0,0.06746133168538411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,1,128,0,1,float16,fp8,0,0.06818133095900218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,1,128,0,1,fp8,fp8,0,0.06035199761390686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,float16,0,0.06782933572928111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,2,128,0,1,float16,fp8,0,0.06866133213043213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,2,128,0,1,fp8,fp8,0,0.06036800146102905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,float16,0,0.0664213349421819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,4,128,0,1,float16,fp8,0,0.06783466537793477
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,float16,0,0.04994133114814758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,12,4,128,0,1,fp8,fp8,0,0.062021334966023765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,12,128,0,1,float16,fp8,0,0.052005335688591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,12,128,0,1,fp8,fp8,0,0.048325334986050926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,float16,0,0.049914668003718056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,1,128,0,1,float16,fp8,0,0.05012266834576925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,1,128,0,1,fp8,fp8,0,0.047541335225105286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,float16,0,0.04976533353328705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,2,128,0,1,float16,fp8,0,0.04994133114814758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,2,128,0,1,fp8,fp8,0,0.04711999992529551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,float16,0,0.050106664498647056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,4,128,0,1,float16,fp8,0,0.05148266752560934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,12,4,128,0,1,fp8,fp8,0,0.04784533381462097
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,float16,0,0.7995893160502116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,1,128,0,1,fp8,fp8,0,0.70742400487264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,1,128,0,1,float16,fp8,0,0.8095306555430094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,float16,0,0.817301352818807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,2,128,0,1,float16,fp8,0,0.8215839862823486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,2,128,0,1,fp8,fp8,0,0.7182239691416422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,float16,0,0.4713386694590251
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,float16,0,0.8379680315653483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,4,128,0,1,fp8,fp8,0,0.7287093003590902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,12,4,128,0,1,float16,fp8,0,0.8343146642049154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,12,128,0,1,float16,fp8,0,0.4738239844640096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,12,128,0,1,fp8,fp8,0,0.41779200236002606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,float16,0,0.4052533308664958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,1,128,0,1,float16,fp8,0,0.4054986635843913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,1,128,0,1,fp8,fp8,0,0.3600533405939738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,float16,0,0.41148801644643146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,2,128,0,1,float16,fp8,0,0.41385066509246826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,2,128,0,1,fp8,fp8,0,0.3638720115025838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,float16,0,0.4232586622238159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,float16,0,0.25147199630737305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,4,128,0,1,fp8,fp8,0,0.37582401434580487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,12,4,128,0,1,float16,fp8,0,0.42909332116444904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,12,128,0,1,float16,fp8,0,0.2524320085843404
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,12,128,0,1,fp8,fp8,0,0.22233599424362183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,float16,0,0.2145813306172689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,1,128,0,1,float16,fp8,0,0.2161173423131307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,1,128,0,1,fp8,fp8,0,0.19556266069412231
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,fp8,0,0.22005333503087363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,2,128,0,1,float16,float16,0,0.21725332736968994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,2,128,0,1,fp8,fp8,0,0.19620800018310547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,float16,0,0.22707732518513998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,4,128,0,1,float16,fp8,0,0.22734399636586508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,float16,0,0.14010133345921835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,12,4,128,0,1,fp8,fp8,0,0.2015413244565328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,12,128,0,1,float16,fp8,0,0.14226667086283365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,12,128,0,1,fp8,fp8,0,0.12552000085512796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,float16,0,0.11403733491897583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,1,128,0,1,float16,fp8,0,0.11577600240707397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,1,128,0,1,fp8,fp8,0,0.10364799698193868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,float16,0,0.11717333396275838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,2,128,0,1,float16,fp8,0,0.11608533064524333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,2,128,0,1,fp8,fp8,0,0.10941867033640544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,float16,0,0.12345066666603088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,4,128,0,1,float16,fp8,0,0.12382400035858154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,12,4,128,0,1,fp8,fp8,0,0.11427199840545654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,float16,0,0.07914666831493378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,12,128,0,1,float16,fp8,0,0.08102933565775554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,12,128,0,1,fp8,fp8,0,0.0766133318344752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,float16,0,0.07070933282375336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,1,128,0,1,float16,fp8,0,0.072543998559316
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,1,128,0,1,fp8,fp8,0,0.06440000236034393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,float16,0,0.07107733190059662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,2,128,0,1,fp8,fp8,0,0.06452799836794536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,2,128,0,1,float16,fp8,0,0.0727893312772115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,float16,0,0.07133866846561432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,4,128,0,1,float16,fp8,0,0.07286933561166127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,12,4,128,0,1,fp8,fp8,0,0.0665226678053538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,float16,0,0.0479360024134318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,12,128,0,1,float16,fp8,0,0.05026666820049286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,12,128,0,1,fp8,fp8,0,0.046069333950678505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,float16,0,0.047877331574757896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,1,128,0,1,float16,fp8,0,0.047872001926104225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,1,128,0,1,fp8,fp8,0,0.043968002001444496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,float16,0,0.04798933366934458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,2,128,0,1,float16,fp8,0,0.04806933303674062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,2,128,0,1,fp8,fp8,0,0.04419200122356415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,float16,0,0.04789866507053375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,4,128,0,1,float16,fp8,0,0.048250665267308555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,12,4,128,0,1,fp8,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,float16,0,0.041840001940727234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,12,128,0,1,float16,fp8,0,0.043525333205858864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,12,128,0,1,fp8,fp8,0,0.039834665755430855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,fp8,0,0.04192533095677694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,1,128,0,1,float16,float16,0,0.04190400242805481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,1,128,0,1,fp8,fp8,0,0.03812800099452337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,float16,0,0.0418453315893809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,2,128,0,1,float16,fp8,0,0.04161600023508072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,float16,0,0.04180799921353658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,2,128,0,1,fp8,fp8,0,0.03808533400297165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,4,128,0,1,float16,fp8,0,0.0421973317861557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,12,4,128,0,1,fp8,fp8,0,0.04009066770474116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,float16,0,0.8309439818064371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,1,128,0,1,float16,fp8,0,0.8286080360412598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,1,128,0,1,fp8,fp8,0,0.7928533554077148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,float16,0,0.8549813429514567
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,2,128,0,1,float16,fp8,0,0.8520692984263102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,2,128,0,1,fp8,fp8,0,0.8109920024871826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,4,128,0,1,fp8,fp8,0,0.8273226420084635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,float16,0,0.8736266295115153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,12,4,128,0,1,float16,fp8,0,0.8666186332702637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,float16,0,0.49831998348236084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,12,128,0,1,float16,fp8,0,0.4886080026626587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,12,128,0,1,fp8,fp8,0,0.4696426788965861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,float16,0,0.42643733819325763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,1,128,0,1,float16,fp8,0,0.42604267597198486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,1,128,0,1,fp8,fp8,0,0.4068479935328166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,float16,0,0.43748799959818524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,2,128,0,1,float16,fp8,0,0.43906132380167645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,2,128,0,1,fp8,fp8,0,0.4161440134048462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,float16,0,0.449562668800354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,4,128,0,1,float16,fp8,0,0.4459199905395508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,12,4,128,0,1,fp8,fp8,0,0.4265120029449463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,float16,0,0.26497600475947064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,12,128,0,1,float16,fp8,0,0.2588319977124532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,12,128,0,1,fp8,fp8,0,0.24743467569351196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,fp8,0,0.22321067253748575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,1,128,0,1,float16,float16,0,0.22221867243448892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,1,128,0,1,fp8,fp8,0,0.2104319930076599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,float16,0,0.2305226723353068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,2,128,0,1,float16,fp8,0,0.22895467281341553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,2,128,0,1,fp8,fp8,0,0.21876800060272217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,float16,0,0.23547732830047607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,4,128,0,1,float16,fp8,0,0.23434666792551676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,float16,0,0.14444266756375632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,12,4,128,0,1,fp8,fp8,0,0.2214826742808024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,12,128,0,1,float16,fp8,0,0.14151466886202493
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,12,128,0,1,fp8,fp8,0,0.13539733489354452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,float16,0,0.11957866946856181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,1,128,0,1,float16,fp8,0,0.11970667044321696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,1,128,0,1,fp8,fp8,0,0.11189333597819011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,float16,0,0.1237546702226003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,2,128,0,1,float16,fp8,0,0.12411733468373616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,2,128,0,1,fp8,fp8,0,0.12012267112731934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,float16,0,0.12902399897575378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,4,128,0,1,float16,fp8,0,0.12686933080355325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,float16,0,0.0839519997437795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,12,4,128,0,1,fp8,fp8,0,0.12275200088818868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,12,128,0,1,float16,fp8,0,0.08203733464082082
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,12,128,0,1,fp8,fp8,0,0.07941866914431255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,float16,0,0.0724480003118515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,1,128,0,1,float16,fp8,0,0.07130133112271626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,1,128,0,1,fp8,fp8,0,0.06426666676998138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,float16,0,0.07242133220036824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,2,128,0,1,float16,fp8,0,0.07247466842333476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,2,128,0,1,fp8,fp8,0,0.06398400167624156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,float16,0,0.07312533259391785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,4,128,0,1,fp8,fp8,0,0.06790400048096974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,12,4,128,0,1,float16,fp8,0,0.07263466715812683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,float16,0,0.05018133421738943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,12,128,0,1,fp8,fp8,0,0.04580800235271454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,12,128,0,1,float16,fp8,0,0.050240000089009605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,float16,0,0.047781333327293396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,1,128,0,1,float16,fp8,0,0.04841599861780802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,1,128,0,1,fp8,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,float16,0,0.04786133269468943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,2,128,0,1,float16,fp8,0,0.04796266555786133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,2,128,0,1,fp8,fp8,0,0.04404266675313314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,float16,0,0.04941866795221964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,4,128,0,1,float16,fp8,0,0.04864533245563507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,12,4,128,0,1,fp8,fp8,0,0.04403733213742574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,float16,0,0.035829332967599235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,12,128,0,1,float16,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,12,128,0,1,fp8,fp8,0,0.033557333052158356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,float16,0,0.033402666449546814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,1,128,0,1,float16,fp8,0,0.034874667723973594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,1,128,0,1,fp8,fp8,0,0.03187733391920725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,float16,0,0.03364799916744232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,2,128,0,1,float16,fp8,0,0.03497066597143809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,2,128,0,1,fp8,fp8,0,0.03143466760714849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,float16,0,0.03397866586844126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,4,128,0,1,float16,fp8,0,0.03389333436886469
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,12,4,128,0,1,fp8,fp8,0,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,float16,0,0.03276266654332479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,12,128,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,12,128,0,1,fp8,fp8,0,0.02939733366171519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,float16,0,0.03150933235883713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,float16,0,0.031557333966096245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,1,128,0,1,fp8,fp8,0,0.02942399928967158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,1,128,0,1,float16,fp8,0,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,2,128,0,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,2,128,0,1,fp8,fp8,0,0.029722665747006733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,float16,0,0.031301334500312805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,4,128,0,1,float16,fp8,0,0.031530665854612984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,12,4,128,0,1,fp8,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,1,128,0,1,float16,float16,0,0.72707732518514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,1,128,0,1,float16,fp8,0,0.7216746807098389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,1,128,0,1,fp8,fp8,0,0.7013280391693115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,2,128,0,1,float16,float16,0,0.7444640000661215
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,2,128,0,1,float16,fp8,0,0.7401973406473795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,2,128,0,1,fp8,fp8,0,0.7180000146230062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,4,128,0,1,float16,float16,0,0.7626453240712484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,4,128,0,1,float16,fp8,0,0.7537333170572916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,12,4,128,0,1,fp8,fp8,0,0.7417439619700114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,12,128,0,1,float16,fp8,0,0.4309759934743245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,12,128,0,1,float16,float16,0,0.44147201379140216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,12,128,0,1,fp8,fp8,0,0.42317867279052734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,1,128,0,1,float16,float16,0,0.37221332391103107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,1,128,0,1,float16,fp8,0,0.37322668234507245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,1,128,0,1,fp8,fp8,0,0.3596479892730713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,2,128,0,1,float16,float16,0,0.3813759883244832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,2,128,0,1,float16,fp8,0,0.3790293137232463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,2,128,0,1,fp8,fp8,0,0.3686559995015462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,4,128,0,1,float16,float16,0,0.3930346568425496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,12,128,0,1,float16,float16,0,0.23333332935969034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,4,128,0,1,float16,fp8,0,0.3881973425547282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,12,4,128,0,1,fp8,fp8,0,0.3762400150299072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,12,128,0,1,fp8,fp8,0,0.22244799137115479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,12,128,0,1,float16,fp8,0,0.22884267568588257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,1,128,0,1,float16,float16,0,0.19403733809789023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,1,128,0,1,float16,fp8,0,0.19319466749827066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,1,128,0,1,fp8,fp8,0,0.1857759952545166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,2,128,0,1,float16,float16,0,0.20267732938130698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,2,128,0,1,float16,fp8,0,0.19993066787719727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,2,128,0,1,fp8,fp8,0,0.19245866934458414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,4,128,0,1,float16,float16,0,0.2070080041885376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,12,128,0,1,float16,float16,0,0.1279253363609314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,4,128,0,1,float16,fp8,0,0.20322666565577188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,12,4,128,0,1,fp8,fp8,0,0.19799999396006265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,12,128,0,1,float16,fp8,0,0.1257866621017456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,12,128,0,1,fp8,fp8,0,0.12111999591191609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,1,128,0,1,float16,float16,0,0.10539199908574422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,1,128,0,1,float16,fp8,0,0.10471466183662415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,1,128,0,1,fp8,fp8,0,0.09713066617647807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,2,128,0,1,float16,fp8,0,0.10770133137702942
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,2,128,0,1,float16,float16,0,0.10842667023340861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,2,128,0,1,fp8,fp8,0,0.10403733452161153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,4,128,0,1,float16,float16,0,0.11171199878056844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,4,128,0,1,float16,fp8,0,0.11141332983970642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,12,4,128,0,1,fp8,fp8,0,0.10769599676132202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,12,128,0,1,float16,float16,0,0.07326933244864146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,12,128,0,1,float16,fp8,0,0.07040533423423767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,12,128,0,1,fp8,fp8,0,0.07172266642252605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,1,128,0,1,float16,float16,0,0.06380266447861989
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,1,128,0,1,float16,fp8,0,0.06447466711203258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,1,128,0,1,fp8,fp8,0,0.055589333176612854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,2,128,0,1,float16,float16,0,0.06260799864927928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,2,128,0,1,float16,fp8,0,0.0642080008983612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,2,128,0,1,fp8,fp8,0,0.05657599866390228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,4,128,0,1,float16,float16,0,0.06311466793219249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,4,128,0,1,float16,fp8,0,0.06517866750558217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,12,4,128,0,1,fp8,fp8,0,0.05913599828879038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,12,128,0,1,float16,float16,0,0.04497066636880239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,12,128,0,1,float16,fp8,0,0.04553600152333578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,12,128,0,1,fp8,fp8,0,0.0417546679576238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,1,128,0,1,float16,float16,0,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,1,128,0,1,float16,fp8,0,0.04344533383846283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,1,128,0,1,fp8,fp8,0,0.037615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,2,128,0,1,float16,float16,0,0.04380266865094503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,2,128,0,1,float16,fp8,0,0.041946664452552795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,2,128,0,1,fp8,fp8,0,0.03782933453718821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,4,128,0,1,float16,float16,0,0.04197333256403605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,4,128,0,1,float16,fp8,0,0.04384533564249674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,12,4,128,0,1,fp8,fp8,0,0.03752533346414566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,12,128,0,1,float16,float16,0,0.030479999879995983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,12,128,0,1,float16,fp8,0,0.03105599929889043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,12,128,0,1,fp8,fp8,0,0.029189333319664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,1,128,0,1,float16,float16,0,0.02939733366171519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,1,128,0,1,float16,fp8,0,0.029498666524887085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,1,128,0,1,fp8,fp8,0,0.027280000348885853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,2,128,0,1,float16,float16,0,0.029333333174387615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,2,128,0,1,fp8,fp8,0,0.02770666778087616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,2,128,0,1,float16,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,4,128,0,1,float16,float16,0,0.029711998999118805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,4,128,0,1,float16,fp8,0,0.031018666923046112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,12,4,128,0,1,fp8,fp8,0,0.027477333943049114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,12,128,0,1,float16,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,12,128,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,12,128,0,1,fp8,fp8,0,0.025434667865435284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,1,128,0,1,float16,float16,0,0.025461333493391674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,1,128,0,1,float16,fp8,0,0.02526933451493581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,1,128,0,1,fp8,fp8,0,0.023770667612552643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,2,128,0,1,float16,float16,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,2,128,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,2,128,0,1,fp8,fp8,0,0.023445333043734234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,4,128,0,1,float16,float16,0,0.027034667630990345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,4,128,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,12,4,128,0,1,fp8,fp8,0,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,12,128,0,1,float16,float16,0,0.025216000775496166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,12,128,0,1,float16,fp8,0,0.025061334172884624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,12,128,0,1,fp8,fp8,0,0.02125866711139679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,1,128,0,1,float16,float16,0,0.02369600037733714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,1,128,0,1,float16,fp8,0,0.02325333406527837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,1,128,0,1,fp8,fp8,0,0.02126399924357732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,2,128,0,1,float16,float16,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,2,128,0,1,float16,fp8,0,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,2,128,0,1,fp8,fp8,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,4,128,0,1,float16,float16,0,0.023685333629449207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,4,128,0,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,12,4,128,0,1,fp8,fp8,0,0.02103466788927714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,1,128,0,1,float16,float16,0,0.3391253153483073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,1,128,0,1,float16,fp8,0,0.33878934383392334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,1,128,0,1,fp8,fp8,0,0.3354826768239339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,2,128,0,1,float16,float16,0,0.34855465094248456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,2,128,0,1,float16,fp8,0,0.34650135040283203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,2,128,0,1,fp8,fp8,0,0.3431146542231242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,4,128,0,1,float16,float16,0,0.356879989306132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,4,128,0,1,float16,fp8,0,0.35366400082906085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,12,4,128,0,1,fp8,fp8,0,0.3505760033925374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,12,128,0,1,float16,float16,0,0.21425066391626993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,12,128,0,1,float16,fp8,0,0.20804800589879355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,12,128,0,1,fp8,fp8,0,0.20827732483545938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,1,128,0,1,float16,float16,0,0.17830399672190347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,1,128,0,1,float16,fp8,0,0.17712533473968506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,1,128,0,1,fp8,fp8,0,0.17329599459966025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,2,128,0,1,float16,float16,0,0.1840426723162333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,2,128,0,1,float16,fp8,0,0.18266665935516357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,2,128,0,1,fp8,fp8,0,0.1819466749827067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,4,128,0,1,float16,float16,0,0.18736000855763754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,4,128,0,1,float16,fp8,0,0.18535999457041422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,12,4,128,0,1,fp8,fp8,0,0.1857973337173462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,12,128,0,1,float16,float16,0,0.11885333061218262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,12,128,0,1,float16,fp8,0,0.11607999602953593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,12,128,0,1,fp8,fp8,0,0.11432000001271565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,1,128,0,1,float16,float16,0,0.09965866804122925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,1,128,0,1,float16,fp8,0,0.09868266185124715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,1,128,0,1,fp8,fp8,0,0.09125333031018575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,2,128,0,1,float16,float16,0,0.10211199522018433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,2,128,0,1,float16,fp8,0,0.10139200091362
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,4,128,0,1,float16,fp8,0,0.10432533423105876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,4,128,0,1,float16,float16,0,0.10517332951227824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,2,128,0,1,fp8,fp8,0,0.10020800431569417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,12,4,128,0,1,fp8,fp8,0,0.10136533776919048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,12,128,0,1,float16,float16,0,0.0662773350874583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,12,128,0,1,float16,fp8,0,0.06520533561706543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,12,128,0,1,fp8,fp8,0,0.06745066742102306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,1,128,0,1,float16,float16,0,0.057999998331069946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,1,128,0,1,float16,fp8,0,0.05608533322811127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,1,128,0,1,fp8,fp8,0,0.05026133358478546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,2,128,0,1,float16,fp8,0,0.05774400134881338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,2,128,0,1,float16,float16,0,0.05949333310127258
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,2,128,0,1,fp8,fp8,0,0.052000001072883606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,4,128,0,1,float16,float16,0,0.0583840012550354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,4,128,0,1,float16,fp8,0,0.05809600154558817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,12,128,0,1,float16,float16,0,0.04195199906826019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,12,4,128,0,1,fp8,fp8,0,0.05525333185990652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,12,128,0,1,float16,fp8,0,0.04185600082079569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,1,128,0,1,float16,float16,0,0.039893334110577904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,12,128,0,1,fp8,fp8,0,0.037808001041412354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,1,128,0,1,fp8,fp8,0,0.03536533315976461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,1,128,0,1,float16,fp8,0,0.04001066585381826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,2,128,0,1,float16,float16,0,0.03956799954175949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,2,128,0,1,float16,fp8,0,0.040031999349594116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,2,128,0,1,fp8,fp8,0,0.036159999668598175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,4,128,0,1,float16,float16,0,0.03977599988381068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,4,128,0,1,float16,fp8,0,0.04012800008058548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,12,4,128,0,1,fp8,fp8,0,0.03540800015131632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,12,128,0,1,float16,float16,0,0.029866665601730347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,12,128,0,1,float16,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,12,128,0,1,fp8,fp8,0,0.027999999622503918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,1,128,0,1,float16,float16,0,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,1,128,0,1,float16,fp8,0,0.028170667588710785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,2,128,0,1,float16,float16,0,0.0271573339899381
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,1,128,0,1,fp8,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,2,128,0,1,float16,fp8,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,2,128,0,1,fp8,fp8,0,0.025621332228183746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,4,128,0,1,float16,float16,0,0.027855999767780304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,4,128,0,1,float16,fp8,0,0.028778667251269024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,12,4,128,0,1,fp8,fp8,0,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,12,128,0,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,12,128,0,1,float16,fp8,0,0.02550400048494339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,12,128,0,1,fp8,fp8,0,0.023333333432674408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,1,128,0,1,float16,float16,0,0.023584000766277313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,1,128,0,1,float16,fp8,0,0.02508266766866048
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,1,128,0,1,fp8,fp8,0,0.023071999351183575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,2,128,0,1,float16,float16,0,0.023658665517965954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,2,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,2,128,0,1,fp8,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,4,128,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,4,128,0,1,float16,fp8,0,0.024517332514127094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,12,4,128,0,1,fp8,fp8,0,0.02290133386850357
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,12,128,0,1,float16,float16,0,0.023045333723227184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,12,128,0,1,float16,fp8,0,0.021290667355060577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,12,128,0,1,fp8,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,1,128,0,1,float16,float16,0,0.021722666919231415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,1,128,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,1,128,0,1,float16,fp8,0,0.02160000056028366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,2,128,0,1,float16,float16,0,0.02146133283774058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,2,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,4,128,0,1,float16,float16,0,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,2,128,0,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,4,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,12,4,128,0,1,float16,fp8,0,0.021397332350413006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,12,128,0,1,float16,float16,0,0.021525333325068157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,12,128,0,1,float16,fp8,0,0.020997333029905956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,12,128,0,1,fp8,fp8,0,0.017466666797796886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,1,128,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,1,128,0,1,float16,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,1,128,0,1,fp8,fp8,0,0.018415999909241993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,2,128,0,1,float16,float16,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,2,128,0,1,float16,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,2,128,0,1,fp8,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,4,128,0,1,float16,float16,0,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,4,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,12,4,128,0,1,fp8,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,1,128,0,1,float16,fp8,0,0.1904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,1,128,0,1,float16,float16,0,0.19115734100341797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,1,128,0,1,fp8,fp8,0,0.18659200270970663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,2,128,0,1,float16,float16,0,0.19608000914255777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,2,128,0,1,float16,fp8,0,0.1962613264719645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,2,128,0,1,fp8,fp8,0,0.19453332821528116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,4,128,0,1,float16,float16,0,0.19902400175730386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,4,128,0,1,float16,fp8,0,0.19775466124216715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,12,4,128,0,1,fp8,fp8,0,0.19552532831827799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,12,128,0,1,float16,float16,0,0.11952533324559529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,12,128,0,1,float16,fp8,0,0.11776000261306763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,12,128,0,1,fp8,fp8,0,0.119759996732076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,1,128,0,1,float16,float16,0,0.10426666339238484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,1,128,0,1,float16,fp8,0,0.10354666908582051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,1,128,0,1,fp8,fp8,0,0.09706133604049683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,2,128,0,1,float16,float16,0,0.10648533701896667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,2,128,0,1,float16,fp8,0,0.10546132922172546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,2,128,0,1,fp8,fp8,0,0.10430933038393657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,4,128,0,1,float16,float16,0,0.10758933424949646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,4,128,0,1,float16,fp8,0,0.1067039966583252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,12,4,128,0,1,fp8,fp8,0,0.1072746713956197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,12,128,0,1,float16,float16,0,0.0677706648906072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,12,128,0,1,float16,fp8,0,0.06631466746330261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,12,128,0,1,fp8,fp8,0,0.06966933111349742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,1,128,0,1,float16,float16,0,0.05991466840108236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,1,128,0,1,float16,fp8,0,0.059989333152770996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,1,128,0,1,fp8,fp8,0,0.05385066568851471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,2,128,0,1,float16,float16,0,0.06035733222961426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,2,128,0,1,float16,fp8,0,0.06043733159701029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,2,128,0,1,fp8,fp8,0,0.054234668612480164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,4,128,0,1,float16,float16,0,0.062208001812299095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,4,128,0,1,float16,fp8,0,0.060421332716941833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,12,4,128,0,1,fp8,fp8,0,0.058117335041364036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,12,128,0,1,float16,float16,0,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,12,128,0,1,float16,fp8,0,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,1,128,0,1,float16,fp8,0,0.03772266705830892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,1,128,0,1,float16,float16,0,0.037674665451049805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,12,128,0,1,fp8,fp8,0,0.03850133220354716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,1,128,0,1,fp8,fp8,0,0.0356480007370313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,2,128,0,1,float16,float16,0,0.03774933268626531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,2,128,0,1,float16,fp8,0,0.03754133234421412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,4,128,0,1,float16,fp8,0,0.03765333443880081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,2,128,0,1,fp8,fp8,0,0.03515733281771342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,4,128,0,1,float16,float16,0,0.037903999288876854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,12,4,128,0,1,fp8,fp8,0,0.035232000052928925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,12,128,0,1,float16,float16,0,0.0295413335164388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,12,128,0,1,float16,fp8,0,0.029872000217437744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,12,128,0,1,fp8,fp8,0,0.027717334528764088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,1,128,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,1,128,0,1,float16,fp8,0,0.027514666318893433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,1,128,0,1,fp8,fp8,0,0.026837334036827087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,2,128,0,1,float16,float16,0,0.027290667096773785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,2,128,0,1,float16,fp8,0,0.027632000545660656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,2,128,0,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,4,128,0,1,float16,float16,0,0.02740799884001414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,4,128,0,1,float16,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,12,4,128,0,1,fp8,fp8,0,0.025274666647116344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,12,128,0,1,float16,float16,0,0.02293866624434789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,12,128,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,12,128,0,1,fp8,fp8,0,0.021349333226680756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,1,128,0,1,float16,float16,0,0.01977066695690155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,1,128,0,1,float16,fp8,0,0.02124800036350886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,1,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,2,128,0,1,float16,float16,0,0.020992000897725422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,2,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,2,128,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,4,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,4,128,0,1,float16,fp8,0,0.021530665457248688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,12,4,128,0,1,fp8,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,12,128,0,1,float16,fp8,0,0.019071999937295914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,12,128,0,1,float16,float16,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,12,128,0,1,fp8,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,1,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,1,128,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,1,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,2,128,0,1,float16,float16,0,0.01904533306757609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,2,128,0,1,float16,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,2,128,0,1,fp8,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,4,128,0,1,float16,float16,0,0.01876266673207283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,4,128,0,1,float16,fp8,0,0.018917333334684372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,12,128,0,1,float16,float16,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,12,4,128,0,1,fp8,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,12,128,0,1,float16,fp8,0,0.01757866640885671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,12,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,1,128,0,1,float16,float16,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,1,128,0,1,float16,fp8,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,1,128,0,1,fp8,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,2,128,0,1,float16,float16,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,2,128,0,1,float16,fp8,0,0.017423999806245167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,2,128,0,1,fp8,fp8,0,0.016143999993801117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,4,128,0,1,float16,float16,0,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,4,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,12,4,128,0,1,fp8,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,12,128,0,1,float16,float16,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,12,128,0,1,float16,fp8,0,0.019013332823912304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,12,128,0,1,fp8,fp8,0,0.01727466657757759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,1,128,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,1,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,1,128,0,1,fp8,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,2,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,2,128,0,1,float16,fp8,0,0.01820266619324684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,2,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,4,128,0,1,float16,float16,0,0.017370666066805523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,4,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,12,4,128,0,1,float16,fp8,0,0.0183999997874101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,1,128,0,1,float16,float16,0,0.12717333436012268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,1,128,0,1,float16,fp8,0,0.12596799929936728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,1,128,0,1,fp8,fp8,0,0.11868266264597575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,2,128,0,1,float16,float16,0,0.12847999731699625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,2,128,0,1,float16,fp8,0,0.12839466333389282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,2,128,0,1,fp8,fp8,0,0.1244533360004425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,4,128,0,1,float16,float16,0,0.13009066383043924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,4,128,0,1,float16,fp8,0,0.1295093297958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,12,4,128,0,1,fp8,fp8,0,0.12771733601888022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,12,128,0,1,float16,fp8,0,0.0767680009206136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,12,128,0,1,float16,float16,0,0.07841066519419353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,12,128,0,1,fp8,fp8,0,0.07948266466458638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,1,128,0,1,float16,float16,0,0.07041066884994507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,1,128,0,1,float16,fp8,0,0.07115733126799266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,1,128,0,1,fp8,fp8,0,0.06317866841952006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,2,128,0,1,float16,float16,0,0.07067733506361644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,2,128,0,1,float16,fp8,0,0.070933332045873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,2,128,0,1,fp8,fp8,0,0.06433600187301636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,4,128,0,1,float16,float16,0,0.07117866476376851
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,4,128,0,1,float16,fp8,0,0.07229333122571309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,12,4,128,0,1,fp8,fp8,0,0.06702400247255962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,12,128,0,1,float16,float16,0,0.046181331078211464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,12,128,0,1,float16,fp8,0,0.04577599962552389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,12,128,0,1,fp8,fp8,0,0.04218133290608724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,1,128,0,1,float16,float16,0,0.04432533184687296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,1,128,0,1,float16,fp8,0,0.04354133208592733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,1,128,0,1,fp8,fp8,0,0.04070399949947993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,2,128,0,1,float16,float16,0,0.043920000394185386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,2,128,0,1,float16,fp8,0,0.04400533437728882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,2,128,0,1,fp8,fp8,0,0.041082667807737984
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,4,128,0,1,float16,float16,0,0.044394666949907936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,4,128,0,1,float16,fp8,0,0.04483200112978617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,12,4,128,0,1,fp8,fp8,0,0.04162133236726125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,12,128,0,1,float16,float16,0,0.029669334491093952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,12,128,0,1,float16,fp8,0,0.031770666440327965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,12,128,0,1,fp8,fp8,0,0.029461334149042766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,1,128,0,1,float16,float16,0,0.029605334003766377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,1,128,0,1,fp8,fp8,0,0.027248000105222065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,2,128,0,1,float16,float16,0,0.02922666569550832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,2,128,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,1,128,0,1,float16,fp8,0,0.0301706666747729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,2,128,0,1,fp8,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,4,128,0,1,float16,float16,0,0.029765332738558452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,4,128,0,1,float16,fp8,0,0.029114666084448498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,12,128,0,1,float16,float16,0,0.023082666099071503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,12,4,128,0,1,fp8,fp8,0,0.02812266598145167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,12,128,0,1,fp8,fp8,0,0.023472001155217487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,12,128,0,1,float16,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,1,128,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,1,128,0,1,fp8,fp8,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,1,128,0,1,float16,fp8,0,0.02388266722361247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,2,128,0,1,float16,float16,0,0.022874665757020313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,2,128,0,1,float16,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,2,128,0,1,fp8,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,4,128,0,1,float16,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,4,128,0,1,float16,float16,0,0.024101334313551586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,12,4,128,0,1,fp8,fp8,0,0.023221333821614582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,12,128,0,1,float16,float16,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,12,128,0,1,float16,fp8,0,0.019280000279347103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,12,128,0,1,fp8,fp8,0,0.017456000049908955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,1,128,0,1,float16,float16,0,0.017407999684413273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,1,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,2,128,0,1,float16,float16,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,1,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,2,128,0,1,float16,fp8,0,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,2,128,0,1,fp8,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,4,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,4,128,0,1,float16,fp8,0,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,12,4,128,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,12,128,0,1,float16,float16,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,12,128,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,12,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,1,128,0,1,float16,float16,0,0.016762666404247284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,1,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,1,128,0,1,float16,fp8,0,0.01988799994190534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,2,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,2,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,2,128,0,1,fp8,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,4,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,4,128,0,1,float16,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,12,4,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,12,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,12,128,0,1,float16,fp8,0,0.01717866708834966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,12,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,1,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,1,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,1,128,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,2,128,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,2,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,2,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,4,128,0,1,float16,float16,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,4,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,12,4,128,0,1,fp8,fp8,0,0.016074666132529575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,12,128,0,1,float16,float16,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,12,128,0,1,float16,fp8,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,1,128,0,1,float16,float16,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,12,128,0,1,fp8,fp8,0,0.016800000021855038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,1,128,0,1,float16,fp8,0,0.016666666915019352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,1,128,0,1,fp8,fp8,0,0.016058667252461117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,2,128,0,1,float16,float16,0,0.016757333030303318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,2,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,2,128,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,4,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,4,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,12,4,128,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,1,128,0,1,float16,float16,0,0.09297600388526917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,1,128,0,1,float16,fp8,0,0.09326400359471639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,1,128,0,1,fp8,fp8,0,0.08502399921417236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,2,128,0,1,float16,float16,0,0.09309333562850952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,2,128,0,1,float16,fp8,0,0.09331732988357544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,2,128,0,1,fp8,fp8,0,0.08616532882054646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,4,128,0,1,float16,float16,0,0.09363733728726704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,4,128,0,1,float16,fp8,0,0.0951039989789327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,12,4,128,0,1,fp8,fp8,0,0.09085333347320557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,12,128,0,1,float16,float16,0,0.055829331278800964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,12,128,0,1,float16,fp8,0,0.05706666906674703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,12,128,0,1,fp8,fp8,0,0.05403733253479004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,1,128,0,1,float16,float16,0,0.05513600011666616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,1,128,0,1,float16,fp8,0,0.05410666763782501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,1,128,0,1,fp8,fp8,0,0.05173333485921224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,2,128,0,1,float16,float16,0,0.05554133156935374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,2,128,0,1,float16,fp8,0,0.05422399938106537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,2,128,0,1,fp8,fp8,0,0.052095999320348106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,4,128,0,1,float16,float16,0,0.05570666491985321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,4,128,0,1,float16,fp8,0,0.054234668612480164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,12,128,0,1,float16,float16,0,0.037402667105197906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,12,4,128,0,1,fp8,fp8,0,0.05028266708056132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,12,128,0,1,float16,fp8,0,0.036490666369597115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,12,128,0,1,fp8,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,1,128,0,1,float16,float16,0,0.035605333745479584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,1,128,0,1,float16,fp8,0,0.035674666364987694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,1,128,0,1,fp8,fp8,0,0.03362133353948593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,2,128,0,1,float16,float16,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,2,128,0,1,fp8,fp8,0,0.03485333422819773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,4,128,0,1,float16,float16,0,0.035642666121323906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,4,128,0,1,float16,fp8,0,0.03562133262554804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,2,128,0,1,float16,fp8,0,0.03638399889071783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,12,128,0,1,float16,float16,0,0.025386666258176167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,12,4,128,0,1,fp8,fp8,0,0.03342933456103007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,12,128,0,1,float16,fp8,0,0.025114665428797405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,1,128,0,1,float16,float16,0,0.02534399926662445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,12,128,0,1,fp8,fp8,0,0.025888000925381977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,1,128,0,1,float16,fp8,0,0.025045332809289295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,1,128,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,2,128,0,1,float16,float16,0,0.025493333737055462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,2,128,0,1,fp8,fp8,0,0.025450666745503742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,2,128,0,1,float16,fp8,0,0.025802666942278545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,4,128,0,1,float16,float16,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,4,128,0,1,float16,fp8,0,0.02533866713444392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,12,4,128,0,1,fp8,fp8,0,0.02566933383544286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,12,128,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,12,128,0,1,float16,float16,0,0.020506666352351505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,12,128,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,1,128,0,1,float16,float16,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,1,128,0,1,float16,fp8,0,0.020394666741291683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,2,128,0,1,float16,float16,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,1,128,0,1,fp8,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,2,128,0,1,float16,fp8,0,0.01988799994190534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,2,128,0,1,fp8,fp8,0,0.01884799947341283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,4,128,0,1,float16,float16,0,0.019914666811625164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,4,128,0,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,12,4,128,0,1,fp8,fp8,0,0.01951466624935468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,12,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,12,128,0,1,float16,float16,0,0.015882667154073715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,12,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,1,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,1,128,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,1,128,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,2,128,0,1,float16,float16,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,2,128,0,1,float16,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,2,128,0,1,fp8,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,4,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,4,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,12,4,128,0,1,fp8,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,12,128,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,12,128,0,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,12,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,1,128,0,1,float16,float16,0,0.016261332978804905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,1,128,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,1,128,0,1,fp8,fp8,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,2,128,0,1,float16,float16,0,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,2,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,2,128,0,1,fp8,fp8,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,4,128,0,1,float16,float16,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,4,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,12,4,128,0,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,12,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,12,128,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,1,128,0,1,float16,float16,0,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,12,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,1,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,1,128,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,2,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,2,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,2,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,4,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,4,128,0,1,float16,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,12,4,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,12,128,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,12,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,12,128,0,1,fp8,fp8,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,1,128,0,1,float16,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,1,128,0,1,float16,float16,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,1,128,0,1,fp8,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,2,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,2,128,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,2,128,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,4,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,4,128,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,12,4,128,0,1,fp8,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,1,128,0,1,float16,float16,0,0.0790719985961914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,1,128,0,1,float16,fp8,0,0.07702933251857758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,1,128,0,1,fp8,fp8,0,0.07467199862003326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,2,128,0,1,float16,float16,0,0.07854933540026347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,2,128,0,1,float16,fp8,0,0.07753066718578339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,2,128,0,1,fp8,fp8,0,0.07474133372306824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,4,128,0,1,float16,float16,0,0.07891199986139934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,4,128,0,1,float16,fp8,0,0.0787306676308314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,12,4,128,0,1,fp8,fp8,0,0.07483733197053273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,12,128,0,1,float16,float16,0,0.04903466502825419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,12,128,0,1,float16,fp8,0,0.04811733464399973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,12,128,0,1,fp8,fp8,0,0.048170665899912514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,1,128,0,1,float16,float16,0,0.04763199885686239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,1,128,0,1,float16,fp8,0,0.04775466521581014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,1,128,0,1,fp8,fp8,0,0.04578666885693868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,2,128,0,1,float16,float16,0,0.04827199876308441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,2,128,0,1,float16,fp8,0,0.04819199939568838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,2,128,0,1,fp8,fp8,0,0.04571733375390371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,4,128,0,1,float16,float16,0,0.047983999053637184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,4,128,0,1,float16,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,12,4,128,0,1,fp8,fp8,0,0.046165332198143005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,12,128,0,1,float16,float16,0,0.03194666653871536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,12,128,0,1,float16,fp8,0,0.031685332457224526
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,12,128,0,1,fp8,fp8,0,0.03161066770553589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,1,128,0,1,float16,float16,0,0.03180266668399175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,1,128,0,1,float16,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,1,128,0,1,fp8,fp8,0,0.02951466788848241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,2,128,0,1,float16,float16,0,0.031370667119820915
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,2,128,0,1,float16,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,2,128,0,1,fp8,fp8,0,0.029690665503342945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,4,128,0,1,float16,float16,0,0.033573334415753685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,4,128,0,1,float16,fp8,0,0.031557333966096245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,12,128,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,12,4,128,0,1,fp8,fp8,0,0.031343999008337654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,12,128,0,1,float16,fp8,0,0.02333866556485494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,1,128,0,1,float16,float16,0,0.021125334004561108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,12,128,0,1,fp8,fp8,0,0.022853332261244457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,1,128,0,1,float16,fp8,0,0.02317333221435547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,1,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,2,128,0,1,float16,float16,0,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,2,128,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,2,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,4,128,0,1,float16,float16,0,0.021727999051411945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,4,128,0,1,float16,fp8,0,0.023077333966890972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,12,128,0,1,float16,float16,0,0.01871466636657715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,12,4,128,0,1,fp8,fp8,0,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,12,128,0,1,float16,fp8,0,0.018954666952292126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,12,128,0,1,fp8,fp8,0,0.01913600042462349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,1,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,1,128,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,1,128,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,2,128,0,1,float16,float16,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,2,128,0,1,float16,fp8,0,0.018911999960740406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,2,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,4,128,0,1,float16,float16,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,4,128,0,1,float16,fp8,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,12,4,128,0,1,fp8,fp8,0,0.018079999834299088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,12,128,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,12,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,12,128,0,1,fp8,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,1,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,1,128,0,1,float16,float16,0,0.01648533344268799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,1,128,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,2,128,0,1,float16,float16,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,2,128,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,2,128,0,1,fp8,fp8,0,0.017551999539136887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,4,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,4,128,0,1,float16,fp8,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,12,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,12,4,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,12,128,0,1,float16,fp8,0,0.014954666296641031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,12,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,1,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,1,128,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,1,128,0,1,fp8,fp8,0,0.01676799977819125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,2,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,2,128,0,1,float16,fp8,0,0.015925332903862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,2,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,4,128,0,1,float16,float16,0,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,4,128,0,1,float16,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,12,4,128,0,1,fp8,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,12,128,0,1,float16,float16,0,0.015599999576807022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,12,128,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,12,128,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,1,128,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,1,128,0,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,1,128,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,2,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,2,128,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,2,128,0,1,fp8,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,4,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,4,128,0,1,float16,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,12,4,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,12,128,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,12,128,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,12,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,1,128,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,1,128,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,1,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,2,128,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,2,128,0,1,fp8,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,2,128,0,1,float16,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,4,128,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,4,128,0,1,float16,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,12,4,128,0,1,fp8,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,1,128,0,1,float16,float16,0,0.0682826687892278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,1,128,0,1,float16,fp8,0,0.06841599941253662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,1,128,0,1,fp8,fp8,0,0.06182933350404104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,2,128,0,1,float16,float16,0,0.06820266445477803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,2,128,0,1,float16,fp8,0,0.06811200082302094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,2,128,0,1,fp8,fp8,0,0.06181866427262624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,4,128,0,1,float16,float16,0,0.0682773341735204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,4,128,0,1,float16,fp8,0,0.06782400111357371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,12,4,128,0,1,fp8,fp8,0,0.0624533345301946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,0,0.04208533465862274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,0,0.039861333866914116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,12,128,0,1,fp8,fp8,0,0.03822399924198786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,1,128,0,1,float16,float16,0,0.04181866844495138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,1,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,1,128,0,1,fp8,fp8,0,0.03818133225043615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,2,128,0,1,float16,float16,0,0.041690667470296226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,2,128,0,1,float16,fp8,0,0.041637333730856575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,2,128,0,1,fp8,fp8,0,0.037962667644023895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,4,128,0,1,float16,float16,0,0.04153066625197729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,4,128,0,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,12,4,128,0,1,fp8,fp8,0,0.03749866783618927
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,0,0.027466667195161183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,0,0.027402666707833607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,12,128,0,1,fp8,fp8,0,0.026496000587940216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,1,128,0,1,float16,float16,0,0.027855999767780304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,1,128,0,1,float16,fp8,0,0.027461332579453785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,1,128,0,1,fp8,fp8,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,2,128,0,1,float16,float16,0,0.027514666318893433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,2,128,0,1,float16,fp8,0,0.027808000644048054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,2,128,0,1,fp8,fp8,0,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,4,128,0,1,float16,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,4,128,0,1,fp8,fp8,0,0.026650667190551758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,12,4,128,0,1,float16,float16,0,0.029877332349618275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,0,0.022848000129063923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,0,0.022976001103719074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,12,128,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,1,128,0,1,float16,float16,0,0.023290666441122692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,1,128,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,1,128,0,1,float16,fp8,0,0.02386666586001714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,2,128,0,1,float16,float16,0,0.023423999547958374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,2,128,0,1,float16,fp8,0,0.0229066660006841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,2,128,0,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,4,128,0,1,float16,float16,0,0.023120000958442688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,4,128,0,1,float16,fp8,0,0.025610665480295818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,12,4,128,0,1,fp8,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,0,0.020031999796628952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,12,128,0,1,fp8,fp8,0,0.01929066702723503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,1,128,0,1,float16,float16,0,0.019088000059127808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,1,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,1,128,0,1,float16,fp8,0,0.020154666155576706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,2,128,0,1,float16,float16,0,0.01922133316596349
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,2,128,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,2,128,0,1,fp8,fp8,0,0.019173332800467808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,4,128,0,1,float16,float16,0,0.02037866661945979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,4,128,0,1,float16,fp8,0,0.02094399929046631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,0,0.01600533351302147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,12,4,128,0,1,fp8,fp8,0,0.01941866676012675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,12,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,1,128,0,1,float16,float16,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,1,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,2,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,2,128,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,2,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,4,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,4,128,0,1,float16,fp8,0,0.017866666118303936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,12,4,128,0,1,fp8,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,0,0.01730666682124138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,12,128,0,1,fp8,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,1,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,1,128,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,1,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,2,128,0,1,float16,float16,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,2,128,0,1,float16,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,2,128,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,4,128,0,1,float16,float16,0,0.01563199982047081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,4,128,0,1,float16,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,12,4,128,0,1,fp8,fp8,0,0.015962666521469753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,12,128,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,1,128,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,1,128,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,1,128,0,1,fp8,fp8,0,0.014917333920796713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,2,128,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,2,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,2,128,0,1,fp8,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,4,128,0,1,float16,fp8,0,0.016837333639462788
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,4,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,12,4,128,0,1,fp8,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,12,128,0,1,fp8,fp8,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,1,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,1,128,0,1,float16,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,1,128,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,2,128,0,1,float16,float16,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,2,128,0,1,float16,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,4,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,2,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,4,128,0,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,12,4,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,1,128,0,1,fp8,fp8,0,2.265669345855713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,float16,0,2.745018641153971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,1,128,0,1,float16,fp8,0,2.733407974243164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,2,128,0,1,fp8,fp8,0,2.275514602661133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,float16,0,2.7805067698160806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,2,128,0,1,float16,fp8,0,2.8349599838256836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,float16,0,2.894634564717611
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,float16,0,1.4844959576924641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,8,128,0,1,float16,fp8,0,1.581125259399414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,4,128,0,1,fp8,fp8,0,2.3089812596639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,8,4,128,0,1,float16,fp8,0,2.814202626546224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,8,128,0,1,fp8,fp8,0,1.2333920001983643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,float16,0,1.4636054039001465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,1,128,0,1,float16,fp8,0,1.4372266133626301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,1,128,0,1,fp8,fp8,0,1.2332586447397869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,float16,0,1.4368906021118164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,2,128,0,1,fp8,fp8,0,1.2044106324513753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,2,128,0,1,float16,fp8,0,1.4657440185546875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,float16,0,1.4377172787984211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,float16,0,0.8324426809946696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,4,128,0,1,float16,fp8,0,1.461839993794759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,8,4,128,0,1,fp8,fp8,0,1.2132586638132732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,8,128,0,1,float16,fp8,0,0.8204800287882487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,8,128,0,1,fp8,fp8,0,0.7146399815877279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,float16,0,0.771392027537028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,1,128,0,1,float16,fp8,0,0.7776052951812744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,1,128,0,1,fp8,fp8,0,0.666048010190328
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,float16,0,0.7924213409423828
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,2,128,0,1,float16,fp8,0,0.7917760213216146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,2,128,0,1,fp8,fp8,0,0.6710720062255859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,float16,0,0.7880160013834635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,float16,0,0.48232531547546387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,4,128,0,1,float16,fp8,0,0.7993386586507162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,8,4,128,0,1,fp8,fp8,0,0.6770079930623373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,8,128,0,1,float16,fp8,0,0.49427199363708496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,8,128,0,1,fp8,fp8,0,0.4150666793187459
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,fp8,0,0.4514133135477702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,1,128,0,1,float16,float16,0,0.4484746853510539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,1,128,0,1,fp8,fp8,0,0.41869866847991943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,float16,0,0.4522240161895752
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,2,128,0,1,fp8,fp8,0,0.40011199315388996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,2,128,0,1,float16,fp8,0,0.45163734753926593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,4,128,0,1,fp8,fp8,0,0.4058613379796346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,float16,0,0.46294931570688885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,8,4,128,0,1,float16,fp8,0,0.4572480122248332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,1,128,0,1,fp8,fp8,0,1.3805599212646484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,float16,0,1.6445172627766926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,1,128,0,1,float16,fp8,0,1.6770666440327961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,float16,0,1.6505279541015625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,2,128,0,1,fp8,fp8,0,1.476688067118327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,2,128,0,1,float16,fp8,0,1.6575253804524739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,4,128,0,1,fp8,fp8,0,1.4096105893452961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,float16,0,1.7268320719401042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,8,4,128,0,1,float16,fp8,0,1.6892479260762532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,8,128,0,1,fp8,fp8,0,0.7814079920450846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,fp8,0,0.9301013151804606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,float16,0,0.8743039766947428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,8,128,0,1,float16,float16,0,0.9251626332600912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,1,128,0,1,fp8,fp8,0,0.7450133164723715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,1,128,0,1,float16,fp8,0,0.860485315322876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,float16,0,0.8849706649780273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,2,128,0,1,float16,fp8,0,0.8889386653900146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,2,128,0,1,fp8,fp8,0,0.7466239929199219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,float16,0,0.8853813012441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,4,128,0,1,fp8,fp8,0,0.7543946901957194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,8,4,128,0,1,float16,fp8,0,0.8982826868693033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,float16,0,0.5181279977162679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,8,128,0,1,fp8,fp8,0,0.44467735290527344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,8,128,0,1,float16,fp8,0,0.5214346647262573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,float16,0,0.48347198963165283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,1,128,0,1,float16,fp8,0,0.4847946564356486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,1,128,0,1,fp8,fp8,0,0.4251946608225505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,float16,0,0.4850720167160034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,2,128,0,1,float16,fp8,0,0.4936213493347168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,2,128,0,1,fp8,fp8,0,0.4275039831797282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,float16,0,0.5007893244425455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,4,128,0,1,float16,fp8,0,0.5005120038986206
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,8,4,128,0,1,fp8,fp8,0,0.43140800793965656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,float16,0,0.3168586691220601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,8,128,0,1,float16,fp8,0,0.3179093400637309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,8,128,0,1,fp8,fp8,0,0.2755146622657776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,float16,0,0.2895359992980957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,1,128,0,1,float16,fp8,0,0.2932426730791728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,1,128,0,1,fp8,fp8,0,0.26071999470392865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,float16,0,0.2900320092837016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,2,128,0,1,float16,fp8,0,0.29124800364176434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,2,128,0,1,fp8,fp8,0,0.25941866636276245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,float16,0,0.29543999830881756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,4,128,0,1,float16,fp8,0,0.29258133967717487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,8,4,128,0,1,fp8,fp8,0,0.26522666215896606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,float16,0,1.2009653250376384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,1,128,0,1,fp8,fp8,0,1.017632007598877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,1,128,0,1,float16,fp8,0,1.2114933331807454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,float16,0,1.2148959636688232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,2,128,0,1,fp8,fp8,0,1.0272160371144612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,2,128,0,1,float16,fp8,0,1.2251466910044353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,float16,0,1.2540427049001057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,4,128,0,1,float16,fp8,0,1.2571626504262288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,float16,0,0.6953120231628418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,8,128,0,1,float16,fp8,0,0.711184024810791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,8,4,128,0,1,fp8,fp8,0,1.0446453094482422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,8,128,0,1,fp8,fp8,0,0.5997973283131918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,float16,0,0.6422666708628336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,1,128,0,1,float16,fp8,0,0.6461120049158732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,1,128,0,1,fp8,fp8,0,0.5552639961242676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,float16,0,0.654581348101298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,2,128,0,1,float16,fp8,0,0.6575626532236735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,2,128,0,1,fp8,fp8,0,0.5590560038884481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,float16,0,0.6611146529515585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,4,128,0,1,float16,fp8,0,0.6714239915211996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,float16,0,0.39740800857543945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,8,4,128,0,1,fp8,fp8,0,0.5670666694641113
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,8,128,0,1,float16,fp8,0,0.39985068639119464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,8,128,0,1,fp8,fp8,0,0.3391679922739665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,float16,0,0.3600693146387736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,1,128,0,1,float16,fp8,0,0.35939733187357586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,1,128,0,1,fp8,fp8,0,0.32028265794118244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,float16,0,0.3649066686630249
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,2,128,0,1,float16,fp8,0,0.3661919832229614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,2,128,0,1,fp8,fp8,0,0.3237706621487935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,float16,0,0.37534932295481366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,4,128,0,1,float16,fp8,0,0.37855998675028485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,8,4,128,0,1,fp8,fp8,0,0.32968000570933026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,float16,0,0.23831466833750406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,8,128,0,1,float16,fp8,0,0.23908267418543497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,8,128,0,1,fp8,fp8,0,0.21584532658259073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,float16,0,0.22819199164708456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,1,128,0,1,float16,fp8,0,0.2298346757888794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,1,128,0,1,fp8,fp8,0,0.2059040069580078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,float16,0,0.23042132457097372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,2,128,0,1,float16,fp8,0,0.22855999072392783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,2,128,0,1,fp8,fp8,0,0.2071946660677592
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,float16,0,0.2283359964688619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,4,128,0,1,float16,fp8,0,0.2309173345565796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,8,4,128,0,1,fp8,fp8,0,0.20867733160654703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,float16,0,1.5960586865743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,1,128,0,1,fp8,fp8,0,1.3513973553975422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,1,128,0,1,float16,fp8,0,1.5949333508809407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,float16,0,1.615898609161377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,2,128,0,1,fp8,fp8,0,1.4349385897318523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,2,128,0,1,float16,fp8,0,1.6174933115641277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,float16,0,1.661893367767334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,float16,0,0.9147253036499023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,4,128,0,1,fp8,fp8,0,1.4134292602539062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,8,4,128,0,1,float16,fp8,0,1.6438080469767253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,8,128,0,1,float16,fp8,0,0.9111626942952474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,8,128,0,1,fp8,fp8,0,0.7606346607208252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,float16,0,0.8547733624776205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,1,128,0,1,fp8,fp8,0,0.710261344909668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,1,128,0,1,float16,fp8,0,0.8328213691711426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,float16,0,0.8340053558349609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,2,128,0,1,fp8,fp8,0,0.7186240355173746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,2,128,0,1,float16,fp8,0,0.8445706367492676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,float16,0,0.8528266747792562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,float16,0,0.48982401688893634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,4,128,0,1,fp8,fp8,0,0.728325366973877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,8,4,128,0,1,float16,fp8,0,0.8672053019205729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,8,128,0,1,float16,fp8,0,0.4984426498413086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,8,128,0,1,fp8,fp8,0,0.41818666458129883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,float16,0,0.44582398732503253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,1,128,0,1,float16,fp8,0,0.451472004254659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,1,128,0,1,fp8,fp8,0,0.3943146864573161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,float16,0,0.45278934637705487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,2,128,0,1,fp8,fp8,0,0.3981279929478963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,2,128,0,1,float16,fp8,0,0.45741868019104004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,float16,0,0.46810134251912433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,float16,0,0.2877546747525533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,4,128,0,1,fp8,fp8,0,0.40485334396362305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,8,4,128,0,1,float16,fp8,0,0.46671466032663983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,8,128,0,1,float16,fp8,0,0.2890080014864604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,8,128,0,1,fp8,fp8,0,0.24878400564193726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,float16,0,0.2557706634203593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,1,128,0,1,float16,fp8,0,0.25464532772699994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,1,128,0,1,fp8,fp8,0,0.2295680046081543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,float16,0,0.2582346598307292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,2,128,0,1,float16,fp8,0,0.2590986688931783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,2,128,0,1,fp8,fp8,0,0.23435733715693155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,float16,0,0.2696320017178853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,4,128,0,1,fp8,fp8,0,0.2399946649869283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,float16,0,0.1730239987373352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,8,4,128,0,1,float16,fp8,0,0.26845866441726685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,8,128,0,1,float16,fp8,0,0.17328532536824545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,8,128,0,1,fp8,fp8,0,0.1601653297742208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,float16,0,0.16675732533137003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,1,128,0,1,float16,fp8,0,0.1682986617088318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,1,128,0,1,fp8,fp8,0,0.15330132842063904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,float16,0,0.1701386570930481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,2,128,0,1,float16,fp8,0,0.16810667514801025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,2,128,0,1,fp8,fp8,0,0.15480533242225647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,float16,0,0.1690773367881775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,4,128,0,1,float16,fp8,0,0.17189866304397583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,8,4,128,0,1,fp8,fp8,0,0.15562132994333902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,float16,0,0.9941866397857666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,1,128,0,1,fp8,fp8,0,0.854581356048584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,1,128,0,1,float16,fp8,0,1.000810702641805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,float16,0,1.0030559698740642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,2,128,0,1,float16,fp8,0,1.0156319936116536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,2,128,0,1,fp8,fp8,0,0.8678026994069418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,float16,0,1.0318613052368164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,4,128,0,1,float16,fp8,0,1.033392031987508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,float16,0,0.5809866587320963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,8,128,0,1,float16,fp8,0,0.5921066602071127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,8,128,0,1,fp8,fp8,0,0.5006879965464274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,8,4,128,0,1,fp8,fp8,0,0.9017972946166992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,float16,0,0.5242826541264852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,1,128,0,1,fp8,fp8,0,0.4556906620661418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,1,128,0,1,float16,fp8,0,0.525322675704956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,float16,0,0.531493345896403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,2,128,0,1,float16,fp8,0,0.5362720092137655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,2,128,0,1,fp8,fp8,0,0.4621066649754842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,float16,0,0.5449546575546265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,4,128,0,1,float16,fp8,0,0.5513866742451986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,8,4,128,0,1,fp8,fp8,0,0.4726453224817912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,float16,0,0.32201067606608075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,8,128,0,1,float16,fp8,0,0.32714666922887164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,8,128,0,1,fp8,fp8,0,0.27853866418202716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,float16,0,0.2841866612434387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,1,128,0,1,float16,fp8,0,0.28463466962178546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,1,128,0,1,fp8,fp8,0,0.2584373354911804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,float16,0,0.2940746744473775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,2,128,0,1,float16,fp8,0,0.29390400648117065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,2,128,0,1,fp8,fp8,0,0.26145599285761517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,float16,0,0.30274667342503864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,4,128,0,1,float16,fp8,0,0.306442658106486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,float16,0,0.18996800978978476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,8,4,128,0,1,fp8,fp8,0,0.26606933275858563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,8,128,0,1,float16,fp8,0,0.19058134158452353
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,8,128,0,1,fp8,fp8,0,0.16890132427215576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,float16,0,0.16689600547154745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,1,128,0,1,float16,fp8,0,0.16988267501195273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,1,128,0,1,fp8,fp8,0,0.15263467033704123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,float16,0,0.16970133781433105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,2,128,0,1,float16,fp8,0,0.17020267248153687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,2,128,0,1,fp8,fp8,0,0.15595733126004538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,float16,0,0.17306133111317953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,4,128,0,1,float16,fp8,0,0.17557867368062338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,float16,0,0.11760000387827556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,8,4,128,0,1,fp8,fp8,0,0.159770667552948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,8,128,0,1,float16,fp8,0,0.11777066191037495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,8,128,0,1,fp8,fp8,0,0.10941333572069804
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,float16,0,0.11495467027028401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,1,128,0,1,float16,fp8,0,0.11574932932853699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,1,128,0,1,fp8,fp8,0,0.1072746713956197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,float16,0,0.11575999855995178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,2,128,0,1,float16,fp8,0,0.1167680025100708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,2,128,0,1,fp8,fp8,0,0.10700800021489461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,float16,0,0.11636799573898315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,4,128,0,1,float16,fp8,0,0.11774933338165283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,8,4,128,0,1,fp8,fp8,0,0.10874133308728536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,float16,0,1.0182240009307861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,1,128,0,1,float16,fp8,0,1.032970666885376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,1,128,0,1,fp8,fp8,0,0.8974080085754395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,2,128,0,1,fp8,fp8,0,0.9144799709320068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,float16,0,1.0406400362650554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,2,128,0,1,float16,fp8,0,1.0478400389353435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,4,128,0,1,fp8,fp8,0,0.9338826338450114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,float16,0,1.0772106647491455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,8,4,128,0,1,float16,fp8,0,1.078330675760905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,8,128,0,1,fp8,fp8,0,0.5184266567230225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,float16,0,0.5985600153605143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,8,128,0,1,float16,fp8,0,0.6065013408660889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,1,128,0,1,fp8,fp8,0,0.4656533400217692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,float16,0,0.5271893342336019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,1,128,0,1,float16,fp8,0,0.5330613454182943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,float16,0,0.5400319894154867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,2,128,0,1,fp8,fp8,0,0.4734453360239665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,2,128,0,1,float16,fp8,0,0.5403786500295004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,float16,0,0.32281599442164105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,4,128,0,1,fp8,fp8,0,0.4859093427658081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,fp8,0,0.5663253466288248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,8,4,128,0,1,float16,float16,0,0.5588373343149821
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,8,128,0,1,float16,fp8,0,0.32710933685302734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,8,128,0,1,fp8,fp8,0,0.28143999973932904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,float16,0,0.2842560013135274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,1,128,0,1,float16,fp8,0,0.2855786681175232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,1,128,0,1,fp8,fp8,0,0.25496000051498413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,float16,0,0.2895253300666809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,2,128,0,1,float16,fp8,0,0.29292800029118854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,2,128,0,1,fp8,fp8,0,0.2613439957300822
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,float16,0,0.3009226719538371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,4,128,0,1,fp8,fp8,0,0.26707732677459717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,8,4,128,0,1,float16,fp8,0,0.3048853278160095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,float16,0,0.18580265839894614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,8,128,0,1,float16,fp8,0,0.18700265884399414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,8,128,0,1,fp8,fp8,0,0.16396266222000122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,float16,0,0.1586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,1,128,0,1,float16,fp8,0,0.15889066457748413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,1,128,0,1,fp8,fp8,0,0.14403200149536133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,float16,0,0.160970667997996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,2,128,0,1,float16,fp8,0,0.16269866625467935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,2,128,0,1,fp8,fp8,0,0.1502240002155304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,float16,0,0.17005334297815958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,4,128,0,1,float16,fp8,0,0.17086400588353476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,8,4,128,0,1,fp8,fp8,0,0.15494400262832642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,float16,0,0.10765332976977031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,8,128,0,1,float16,fp8,0,0.10976533095041911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,8,128,0,1,fp8,fp8,0,0.10180800159772237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,float16,0,0.10333333412806193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,1,128,0,1,fp8,fp8,0,0.09382399916648865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,1,128,0,1,float16,fp8,0,0.10537599523862202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,float16,0,0.10551466544469197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,2,128,0,1,float16,fp8,0,0.10439999898274739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,2,128,0,1,fp8,fp8,0,0.09523199995358785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,fp8,0,0.10743467013041179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,4,128,0,1,float16,float16,0,0.10577600200970967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,8,4,128,0,1,fp8,fp8,0,0.0972213347752889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,float16,0,0.07253333429495494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,8,128,0,1,float16,fp8,0,0.07311466832955678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,8,128,0,1,fp8,fp8,0,0.06861333549022675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,float16,0,0.07046400010585785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,1,128,0,1,fp8,fp8,0,0.06625600159168243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,1,128,0,1,float16,fp8,0,0.07044800122578938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,float16,0,0.07026666899522145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,2,128,0,1,float16,fp8,0,0.07233599821726482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,2,128,0,1,fp8,fp8,0,0.06823466718196869
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,float16,0,0.07264000177383423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,4,128,0,1,float16,fp8,0,0.07252266506354015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,8,4,128,0,1,fp8,fp8,0,0.06833600004514058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,float16,0,0.671066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,1,128,0,1,fp8,fp8,0,0.5938186645507812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,1,128,0,1,float16,fp8,0,0.6707359949747721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,float16,0,0.6868693033854166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,2,128,0,1,fp8,fp8,0,0.606277346611023
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,2,128,0,1,float16,fp8,0,0.6908853054046631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,float16,0,0.7061813672383627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,4,128,0,1,float16,fp8,0,0.7145973046620687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,float16,0,0.39973334471384686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,8,128,0,1,float16,fp8,0,0.4034506479899089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,8,128,0,1,fp8,fp8,0,0.3499679962793986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,8,4,128,0,1,fp8,fp8,0,0.623525341351827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,float16,0,0.3500959873199463
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,1,128,0,1,float16,fp8,0,0.34969600041707355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,1,128,0,1,fp8,fp8,0,0.31250133117039997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,float16,0,0.36137068271636963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,2,128,0,1,float16,fp8,0,0.3614879846572876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,2,128,0,1,fp8,fp8,0,0.3181599974632263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,float16,0,0.37269333998362225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,4,128,0,1,float16,fp8,0,0.3760106563568115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,8,4,128,0,1,fp8,fp8,0,0.32868266105651855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,float16,0,0.2203893264134725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,8,128,0,1,float16,fp8,0,0.22459199031194052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,8,128,0,1,fp8,fp8,0,0.19612799088160196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,float16,0,0.18784532944361368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,1,128,0,1,float16,fp8,0,0.18964266777038574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,1,128,0,1,fp8,fp8,0,0.17484267552693686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,float16,0,0.1934666633605957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,2,128,0,1,float16,fp8,0,0.19436800479888916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,2,128,0,1,fp8,fp8,0,0.17730132738749185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,float16,0,0.2047040065129598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,4,128,0,1,float16,fp8,0,0.20634132623672485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,8,4,128,0,1,fp8,fp8,0,0.18366400400797525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,float16,0,0.12602667013804117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,8,128,0,1,fp8,fp8,0,0.11545067032178243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,8,128,0,1,float16,fp8,0,0.12800000111262003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,float16,0,0.10971200466156006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,1,128,0,1,float16,fp8,0,0.11050666371981303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,1,128,0,1,fp8,fp8,0,0.09950400392214458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,float16,0,0.11097600062688191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,2,128,0,1,float16,fp8,0,0.11194133758544922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,float16,0,0.1139306624730428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,2,128,0,1,fp8,fp8,0,0.10171199838320415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,4,128,0,1,float16,fp8,0,0.11424000064531963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,float16,0,0.07670400043328603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,8,4,128,0,1,fp8,fp8,0,0.10687999924023946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,8,128,0,1,float16,fp8,0,0.07724266747633617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,8,128,0,1,fp8,fp8,0,0.0726453314224879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,float16,0,0.07351999978224437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,1,128,0,1,float16,fp8,0,0.07627733548482259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,1,128,0,1,fp8,fp8,0,0.06734399994214375
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,float16,0,0.07500266532103221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,2,128,0,1,float16,fp8,0,0.075573335091273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,2,128,0,1,fp8,fp8,0,0.06820799907048543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,float16,0,0.0748586654663086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,4,128,0,1,float16,fp8,0,0.07713599999745686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,8,4,128,0,1,fp8,fp8,0,0.06876266499360402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,float16,0,0.05797333518664042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,8,128,0,1,float16,fp8,0,0.06010133524735769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,8,128,0,1,fp8,fp8,0,0.05624533196290334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,float16,0,0.05663466453552246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,1,128,0,1,float16,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,1,128,0,1,fp8,fp8,0,0.054378668467203774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,float16,0,0.056234667698542275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,2,128,0,1,float16,fp8,0,0.05737066765626272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,2,128,0,1,fp8,fp8,0,0.05413866539796194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,float16,0,0.05801066756248474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,4,128,0,1,float16,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,8,4,128,0,1,fp8,fp8,0,0.054287999868392944
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,float16,0,0.7680959701538086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,1,128,0,1,float16,fp8,0,0.7711199919382731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,1,128,0,1,fp8,fp8,0,0.6767786343892416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,2,128,0,1,fp8,fp8,0,0.6921066443125407
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,fp8,0,0.7919572989145914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,2,128,0,1,float16,float16,0,0.7868426640828451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,float16,0,0.44494398434956867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,4,128,0,1,fp8,fp8,0,0.7091466585795084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,float16,0,0.800335963567098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,8,4,128,0,1,float16,fp8,0,0.8097973664601644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,8,128,0,1,float16,fp8,0,0.44995733102162677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,8,128,0,1,fp8,fp8,0,0.39528000354766846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,fp8,0,0.38917334874471027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,1,128,0,1,float16,float16,0,0.387173334757487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,1,128,0,1,fp8,fp8,0,0.3480693499247233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,float16,0,0.3994506597518921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,2,128,0,1,float16,fp8,0,0.40089599291483563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,2,128,0,1,fp8,fp8,0,0.3559253215789795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,float16,0,0.41654932498931885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,4,128,0,1,float16,fp8,0,0.4182453155517578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,float16,0,0.23856000105539957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,8,4,128,0,1,fp8,fp8,0,0.3680373430252075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,8,128,0,1,float16,fp8,0,0.24125333627065024
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,8,128,0,1,fp8,fp8,0,0.21273066600163779
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,float16,0,0.20268799861272177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,1,128,0,1,float16,fp8,0,0.20494933923085532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,1,128,0,1,fp8,fp8,0,0.18881599108378092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,float16,0,0.2126026749610901
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,2,128,0,1,float16,fp8,0,0.2130240003267924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,2,128,0,1,fp8,fp8,0,0.19173866510391235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,float16,0,0.2230666677157084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,4,128,0,1,float16,fp8,0,0.22643200556437174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,8,4,128,0,1,fp8,fp8,0,0.1997119983037313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,float16,0,0.13473066687583923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,8,128,0,1,float16,fp8,0,0.1360106666882833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,8,128,0,1,fp8,fp8,0,0.12169599533081055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,fp8,0,0.11221866806348164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,1,128,0,1,float16,float16,0,0.11058132847150166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,1,128,0,1,fp8,fp8,0,0.10311466455459595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,float16,0,0.11170132954915364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,2,128,0,1,float16,fp8,0,0.1141813298066457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,2,128,0,1,fp8,fp8,0,0.10785067081451416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,float16,0,0.12300266822179158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,4,128,0,1,float16,fp8,0,0.1237546702226003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,8,4,128,0,1,fp8,fp8,0,0.11532266934712727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,float16,0,0.07544533411661784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,8,128,0,1,float16,fp8,0,0.07805866499741872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,8,128,0,1,fp8,fp8,0,0.07314666608969371
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,float16,0,0.07035733262697856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,1,128,0,1,float16,fp8,0,0.07131200035413106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,1,128,0,1,fp8,fp8,0,0.06637333333492279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,float16,0,0.07268799841403961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,2,128,0,1,float16,fp8,0,0.07267733414967854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,2,128,0,1,fp8,fp8,0,0.06666133304437001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,float16,0,0.07246399919191997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,4,128,0,1,float16,fp8,0,0.07461333274841309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,float16,0,0.05027733246485392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,8,4,128,0,1,fp8,fp8,0,0.06716800232728322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,8,128,0,1,float16,fp8,0,0.05002133548259735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,8,128,0,1,fp8,fp8,0,0.047872001926104225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,float16,0,0.04769066472848257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,1,128,0,1,fp8,fp8,0,0.04458666841189066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,1,128,0,1,float16,fp8,0,0.04816000163555145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,float16,0,0.04790399968624115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,2,128,0,1,fp8,fp8,0,0.04595733185609182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,2,128,0,1,float16,fp8,0,0.04800533254941305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,float16,0,0.04964800179004669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,4,128,0,1,float16,fp8,0,0.04965866605440775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,8,4,128,0,1,fp8,fp8,0,0.04563733438650767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,float16,0,0.04365866879622141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,8,128,0,1,float16,fp8,0,0.04414399961630503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,8,128,0,1,fp8,fp8,0,0.04194133480389913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,float16,0,0.043706665436426796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,1,128,0,1,float16,fp8,0,0.04375466704368591
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,1,128,0,1,fp8,fp8,0,0.039994666973749794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,float16,0,0.04381333291530609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,2,128,0,1,float16,fp8,0,0.04379733403523763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,2,128,0,1,fp8,fp8,0,0.04012266546487808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,float16,0,0.04372799893220266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,4,128,0,1,float16,fp8,0,0.043712000052134194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,8,4,128,0,1,fp8,fp8,0,0.04155733436346054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,float16,0,0.5397280057271322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,1,128,0,1,float16,fp8,0,0.5394506851832072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,1,128,0,1,fp8,fp8,0,0.47802666823069256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,2,128,0,1,fp8,fp8,0,0.48704532782236737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,fp8,0,0.5516906579335531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,2,128,0,1,float16,float16,0,0.5516479810078939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,4,128,0,1,fp8,fp8,0,0.49803733825683594
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,fp8,0,0.5650186538696289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,8,4,128,0,1,float16,float16,0,0.5574826796849569
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,8,128,0,1,fp8,fp8,0,0.2853066722551982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,fp8,0,0.3192480007807414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,8,128,0,1,float16,float16,0,0.3174506624539693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,1,128,0,1,fp8,fp8,0,0.24823466936747232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,fp8,0,0.27513599395751953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,1,128,0,1,float16,float16,0,0.2720959981282552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,fp8,0,0.28228267033894855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,2,128,0,1,fp8,fp8,0,0.2539199988047282
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,2,128,0,1,float16,float16,0,0.28123732407887775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,float16,0,0.29122666517893475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,4,128,0,1,fp8,fp8,0,0.2609226703643799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,8,4,128,0,1,float16,fp8,0,0.29333333174387616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,float16,0,0.17100799083709717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,float16,0,0.14198399583498636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,8,128,0,1,fp8,fp8,0,0.1562933325767517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,8,128,0,1,float16,fp8,0,0.17494932810465494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,1,128,0,1,float16,fp8,0,0.14240533113479614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,1,128,0,1,fp8,fp8,0,0.13598933815956116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,float16,0,0.15049599607785544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,2,128,0,1,float16,fp8,0,0.15159466862678528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,2,128,0,1,fp8,fp8,0,0.13987732927004495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,float16,0,0.15743999679883322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,4,128,0,1,float16,fp8,0,0.16012799739837646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,float16,0,0.09799999992052714
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,8,4,128,0,1,fp8,fp8,0,0.14481600125630698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,8,128,0,1,float16,fp8,0,0.09748799602190654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,8,128,0,1,fp8,fp8,0,0.09114666779836018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,float16,0,0.08183999856313069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,1,128,0,1,float16,fp8,0,0.08260799944400787
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,1,128,0,1,fp8,fp8,0,0.07457600037256877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,float16,0,0.08246933420499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,2,128,0,1,float16,fp8,0,0.08481066425641377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,2,128,0,1,fp8,fp8,0,0.07530666887760162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,float16,0,0.08493866523106892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,4,128,0,1,float16,fp8,0,0.08741333087285359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,8,4,128,0,1,fp8,fp8,0,0.08294400076071422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,float16,0,0.05717333157857259
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,8,128,0,1,fp8,fp8,0,0.05436266462008158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,8,128,0,1,float16,fp8,0,0.05858666698137919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,float16,0,0.054245332876841225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,1,128,0,1,float16,fp8,0,0.05598400036493937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,1,128,0,1,fp8,fp8,0,0.05035200218359629
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,fp8,0,0.05595199763774872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,2,128,0,1,float16,float16,0,0.05459733307361603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,2,128,0,1,fp8,fp8,0,0.050016000866889954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,fp8,0,0.05762133498986562
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,4,128,0,1,float16,float16,0,0.05621333420276642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,8,4,128,0,1,fp8,fp8,0,0.052330667773882546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,float16,0,0.03962666789690653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,8,128,0,1,float16,fp8,0,0.04170133173465729
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,8,128,0,1,fp8,fp8,0,0.03987200061480204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,float16,0,0.040336000422636666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,1,128,0,1,float16,fp8,0,0.03989866624275843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,1,128,0,1,fp8,fp8,0,0.03733866661787033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,float16,0,0.03985599925120672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,2,128,0,1,float16,fp8,0,0.03963200002908707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,2,128,0,1,fp8,fp8,0,0.03754666695992152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,float16,0,0.039706667264302574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,4,128,0,1,float16,fp8,0,0.03984533250331879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,8,4,128,0,1,fp8,fp8,0,0.03793066740036011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,8,128,0,1,float16,float16,0,0.035904000202814736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,8,128,0,1,fp8,fp8,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,float16,0,0.03773866593837738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,1,128,0,1,float16,fp8,0,0.03548266738653183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,1,128,0,1,fp8,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,float16,0,0.037615999579429626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,2,128,0,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,2,128,0,1,fp8,fp8,0,0.03541333228349686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,float16,0,0.0364533339937528
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,4,128,0,1,float16,fp8,0,0.0359199990828832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,8,4,128,0,1,fp8,fp8,0,0.03568000098069509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,float16,0,0.5465706586837769
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,1,128,0,1,float16,fp8,0,0.5438826481501261
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,1,128,0,1,fp8,fp8,0,0.5220853487650553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,float16,0,0.5619253317515055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,2,128,0,1,float16,fp8,0,0.5559200048446655
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,2,128,0,1,fp8,fp8,0,0.5299199819564819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,float16,0,0.5886933406194051
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,4,128,0,1,float16,fp8,0,0.5818453232447306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,8,4,128,0,1,fp8,fp8,0,0.5589439868927002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,float16,0,0.32837865749994916
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,8,128,0,1,float16,fp8,0,0.3230026761690776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,8,128,0,1,fp8,fp8,0,0.31539734204610187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,float16,0,0.2847413420677185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,1,128,0,1,float16,fp8,0,0.2832319935162862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,1,128,0,1,fp8,fp8,0,0.2701866626739502
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,float16,0,0.29173866907755536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,2,128,0,1,float16,fp8,0,0.2911360065142314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,2,128,0,1,fp8,fp8,0,0.2751413385073344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,float16,0,0.30672534306844074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,4,128,0,1,float16,fp8,0,0.30486400922139484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,8,4,128,0,1,fp8,fp8,0,0.2885119915008545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,float16,0,0.1792693336804708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,8,128,0,1,float16,fp8,0,0.17454399665196738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,8,128,0,1,fp8,fp8,0,0.1691946585973104
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,float16,0,0.15454399585723877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,1,128,0,1,float16,fp8,0,0.15204266707102457
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,1,128,0,1,fp8,fp8,0,0.14436266819636026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,float16,0,0.1578879952430725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,2,128,0,1,float16,fp8,0,0.15684266885121664
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,2,128,0,1,fp8,fp8,0,0.14832533399264017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,float16,0,0.16571733355522156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,4,128,0,1,float16,fp8,0,0.16541866461435953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,8,4,128,0,1,fp8,fp8,0,0.1564959983030955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,fp8,0,0.09962667028109233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,8,128,0,1,fp8,fp8,0,0.09676266709963481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,float16,0,0.08643200000127156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,8,128,0,1,float16,float16,0,0.1036959985891978
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,1,128,0,1,float16,fp8,0,0.08449600140253703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,1,128,0,1,fp8,fp8,0,0.07787733276685078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,float16,0,0.08650133013725281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,2,128,0,1,float16,fp8,0,0.08734400073687236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,2,128,0,1,fp8,fp8,0,0.08338133494059245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,float16,0,0.09311999877293904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,4,128,0,1,float16,fp8,0,0.09174933036168416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,float16,0,0.055973331133524575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,8,4,128,0,1,fp8,fp8,0,0.09116266171137492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,8,128,0,1,float16,fp8,0,0.05478399991989136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,8,128,0,1,fp8,fp8,0,0.055488000313440956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,float16,0,0.053413331508636475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,1,128,0,1,fp8,fp8,0,0.04794133206208547
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,1,128,0,1,float16,fp8,0,0.052986666560173035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,float16,0,0.052058666944503784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,2,128,0,1,float16,fp8,0,0.05377600093682607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,2,128,0,1,fp8,fp8,0,0.049402669072151184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,fp8,0,0.05429333448410034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,4,128,0,1,fp8,fp8,0,0.0518453319867452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,8,4,128,0,1,float16,float16,0,0.05506666501363119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,float16,0,0.03595199932654699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,8,128,0,1,float16,fp8,0,0.03572800010442734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,8,128,0,1,fp8,fp8,0,0.034314667185147606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,float16,0,0.03428266694148382
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,1,128,0,1,float16,fp8,0,0.035743998984495796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,1,128,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,float16,0,0.0352960005402565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,2,128,0,1,float16,fp8,0,0.034186666210492454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,2,128,0,1,fp8,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,fp8,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,4,128,0,1,float16,float16,0,0.03559466699759165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,8,4,128,0,1,fp8,fp8,0,0.035936000446478523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,float16,0,0.02958933264017105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,8,128,0,1,float16,fp8,0,0.03142400085926056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,8,128,0,1,fp8,fp8,0,0.029466666281223297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,fp8,0,0.029530666768550873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,1,128,0,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,1,128,0,1,float16,float16,0,0.029706666866938274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,float16,0,0.029359998802344005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,2,128,0,1,fp8,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,2,128,0,1,float16,fp8,0,0.02926933268706004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,float16,0,0.0310506671667099
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,4,128,0,1,float16,fp8,0,0.029829333225886028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,8,4,128,0,1,fp8,fp8,0,0.029477333029111225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,float16,0,0.027258666853109997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,8,128,0,1,float16,fp8,0,0.02720533311367035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,8,128,0,1,fp8,fp8,0,0.025616000096003216
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,float16,0,0.0271519993742307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,1,128,0,1,float16,fp8,0,0.027658666173617046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,1,128,0,1,fp8,fp8,0,0.025279998779296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,float16,0,0.02643733223279317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,2,128,0,1,float16,fp8,0,0.027269333600997925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,2,128,0,1,fp8,fp8,0,0.02518933266401291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,float16,0,0.027562665442625683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,4,128,0,1,float16,fp8,0,0.027530667682488758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,8,4,128,0,1,fp8,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,1,128,0,1,float16,fp8,0,0.47038400173187256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,1,128,0,1,float16,float16,0,0.4751573403676351
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,1,128,0,1,fp8,fp8,0,0.4578666687011719
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,2,128,0,1,float16,float16,0,0.4885066747665405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,2,128,0,1,fp8,fp8,0,0.46851734320322674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,2,128,0,1,float16,fp8,0,0.4834933280944824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,4,128,0,1,float16,float16,0,0.5176159938176473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,4,128,0,1,float16,fp8,0,0.5095040003458658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,8,4,128,0,1,fp8,fp8,0,0.4965866804122925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,8,128,0,1,float16,float16,0,0.28809599081675213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,8,128,0,1,float16,fp8,0,0.28246933221817017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,8,128,0,1,fp8,fp8,0,0.28246933221817017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,1,128,0,1,float16,float16,0,0.24688533941904703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,1,128,0,1,float16,fp8,0,0.24460800488789877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,1,128,0,1,fp8,fp8,0,0.23717333873112997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,2,128,0,1,float16,float16,0,0.2550719976425171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,2,128,0,1,float16,fp8,0,0.25177067518234253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,2,128,0,1,fp8,fp8,0,0.24337067206700644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,4,128,0,1,float16,float16,0,0.2690560022989909
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,4,128,0,1,float16,fp8,0,0.2643946607907613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,8,4,128,0,1,fp8,fp8,0,0.2574399908383687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,8,128,0,1,float16,float16,0,0.15641066431999207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,8,128,0,1,float16,fp8,0,0.15269333124160767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,8,128,0,1,fp8,fp8,0,0.1504746675491333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,1,128,0,1,float16,float16,0,0.1349493364493052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,1,128,0,1,float16,fp8,0,0.13108799854914346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,1,128,0,1,fp8,fp8,0,0.12798399726549783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,2,128,0,1,float16,float16,0,0.13782399892807007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,2,128,0,1,float16,fp8,0,0.13635733723640442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,2,128,0,1,fp8,fp8,0,0.13136000434557596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,4,128,0,1,float16,float16,0,0.1455573340257009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,4,128,0,1,float16,fp8,0,0.14430399735768637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,8,4,128,0,1,fp8,fp8,0,0.13707733154296875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,8,128,0,1,float16,float16,0,0.08986133337020874
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,8,128,0,1,float16,fp8,0,0.0869706670443217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,8,128,0,1,fp8,fp8,0,0.08570667107899983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,1,128,0,1,float16,float16,0,0.0742986649274826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,1,128,0,1,fp8,fp8,0,0.06875200072924297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,2,128,0,1,float16,float16,0,0.0767680009206136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,1,128,0,1,float16,fp8,0,0.07306133210659027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,2,128,0,1,float16,fp8,0,0.07486400008201599
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,2,128,0,1,fp8,fp8,0,0.07239466905593872
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,4,128,0,1,float16,float16,0,0.08189333478609721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,4,128,0,1,fp8,fp8,0,0.07854933540026347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,8,4,128,0,1,float16,fp8,0,0.08029866715272267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,8,128,0,1,float16,float16,0,0.04783466458320618
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,8,128,0,1,float16,fp8,0,0.04825599988301595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,8,128,0,1,fp8,fp8,0,0.04814933240413666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,1,128,0,1,float16,float16,0,0.04538666705290476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,1,128,0,1,float16,fp8,0,0.04584000011285146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,1,128,0,1,fp8,fp8,0,0.04180799921353658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,2,128,0,1,float16,float16,0,0.04601066807905833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,2,128,0,1,fp8,fp8,0,0.04188799858093262
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,4,128,0,1,float16,float16,0,0.0476693312327067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,2,128,0,1,float16,fp8,0,0.04674133161703745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,4,128,0,1,float16,fp8,0,0.04622933268547058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,8,128,0,1,float16,float16,0,0.03189333279927572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,8,4,128,0,1,fp8,fp8,0,0.044549331068992615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,8,128,0,1,fp8,fp8,0,0.030720000465710957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,8,128,0,1,float16,fp8,0,0.03219199925661087
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,1,128,0,1,float16,float16,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,1,128,0,1,float16,fp8,0,0.029311999678611755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,1,128,0,1,fp8,fp8,0,0.028192001084486645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,2,128,0,1,float16,float16,0,0.030533333619435627
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,2,128,0,1,float16,fp8,0,0.03108799954255422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,2,128,0,1,fp8,fp8,0,0.029359998802344005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,4,128,0,1,float16,float16,0,0.03128000100453695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,4,128,0,1,float16,fp8,0,0.031727999448776245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,8,4,128,0,1,fp8,fp8,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,8,128,0,1,float16,float16,0,0.025173333783944447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,8,128,0,1,float16,fp8,0,0.025519999365011852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,8,128,0,1,fp8,fp8,0,0.023525332411130268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,1,128,0,1,float16,float16,0,0.025040000677108765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,1,128,0,1,float16,fp8,0,0.025050667424996693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,1,128,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,2,128,0,1,float16,float16,0,0.025061334172884624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,2,128,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,2,128,0,1,fp8,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,4,128,0,1,float16,float16,0,0.02531733363866806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,4,128,0,1,float16,fp8,0,0.02517866591612498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,8,4,128,0,1,fp8,fp8,0,0.02404266595840454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,8,128,0,1,float16,float16,0,0.02316266546646754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,8,128,0,1,float16,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,8,128,0,1,fp8,fp8,0,0.02162133405605952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,1,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,1,128,0,1,float16,fp8,0,0.023226665953795116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,1,128,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,2,128,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,2,128,0,1,float16,float16,0,0.021557333568731945
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,2,128,0,1,fp8,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,4,128,0,1,float16,float16,0,0.02130666623512904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,4,128,0,1,float16,fp8,0,0.022416000564893086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,8,4,128,0,1,fp8,fp8,0,0.021029333273569744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,8,128,0,1,float16,float16,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,8,128,0,1,fp8,fp8,0,0.019797333826621372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,1,128,0,1,float16,float16,0,0.021226666867733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,8,128,0,1,float16,fp8,0,0.02204799900452296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,1,128,0,1,float16,fp8,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,1,128,0,1,fp8,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,2,128,0,1,float16,float16,0,0.02179733415444692
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,2,128,0,1,float16,fp8,0,0.021418665846188862
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,2,128,0,1,fp8,fp8,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,4,128,0,1,float16,float16,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,4,128,0,1,float16,fp8,0,0.021568000316619873
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,8,4,128,0,1,fp8,fp8,0,0.021018666525681812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,1,128,0,1,float16,float16,0,0.21826666593551636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,1,128,0,1,float16,fp8,0,0.21876800060272217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,1,128,0,1,fp8,fp8,0,0.22273067633310953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,2,128,0,1,float16,float16,0,0.22744532426198324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,2,128,0,1,float16,fp8,0,0.22237332661946616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,2,128,0,1,fp8,fp8,0,0.22548800706863403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,4,128,0,1,float16,float16,0,0.24754667282104492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,4,128,0,1,float16,fp8,0,0.24152000745137533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,8,4,128,0,1,fp8,fp8,0,0.2384106715520223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,8,128,0,1,float16,fp8,0,0.142384002606074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,8,128,0,1,float16,float16,0,0.14441600441932678
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,8,128,0,1,fp8,fp8,0,0.14260799686113992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,1,128,0,1,float16,float16,0,0.1176479955514272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,1,128,0,1,float16,fp8,0,0.116565336783727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,1,128,0,1,fp8,fp8,0,0.11764267086982727
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,2,128,0,1,float16,float16,0,0.12198399504025777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,2,128,0,1,float16,fp8,0,0.12058666348457336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,2,128,0,1,fp8,fp8,0,0.11987732847531636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,4,128,0,1,float16,float16,0,0.13317867120107016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,4,128,0,1,float16,fp8,0,0.13087999820709229
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,8,4,128,0,1,fp8,fp8,0,0.12894399960835776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,8,128,0,1,float16,float16,0,0.08449600140253703
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,8,128,0,1,float16,fp8,0,0.08123733103275299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,8,128,0,1,fp8,fp8,0,0.08167466521263123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,1,128,0,1,float16,float16,0,0.06844800213972728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,1,128,0,1,float16,fp8,0,0.06679466863473256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,1,128,0,1,fp8,fp8,0,0.06448000172773997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,2,128,0,1,float16,float16,0,0.07049066821734111
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,2,128,0,1,float16,fp8,0,0.06849066913127899
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,2,128,0,1,fp8,fp8,0,0.06757333377997081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,4,128,0,1,float16,float16,0,0.07533866663773854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,4,128,0,1,float16,fp8,0,0.07237866520881653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,8,4,128,0,1,fp8,fp8,0,0.07512533167997996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,8,128,0,1,float16,float16,0,0.04320533573627472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,8,128,0,1,float16,fp8,0,0.04194133480389913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,8,128,0,1,fp8,fp8,0,0.04414933423201243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,1,128,0,1,float16,float16,0,0.03980266551176707
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,1,128,0,1,float16,fp8,0,0.039962666730086006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,1,128,0,1,fp8,fp8,0,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,2,128,0,1,float16,float16,0,0.04159466673930486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,4,128,0,1,float16,float16,0,0.0421066681543986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,2,128,0,1,float16,fp8,0,0.0399893323580424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,2,128,0,1,fp8,fp8,0,0.039674667020638786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,4,128,0,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,8,128,0,1,float16,float16,0,0.02757333219051361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,8,4,128,0,1,fp8,fp8,0,0.041946664452552795
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,8,128,0,1,float16,fp8,0,0.029674666623274486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,8,128,0,1,fp8,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,1,128,0,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,1,128,0,1,float16,fp8,0,0.0273333340883255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,1,128,0,1,fp8,fp8,0,0.027690666417280834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,2,128,0,1,float16,float16,0,0.02754133443037669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,2,128,0,1,float16,fp8,0,0.027669332921504974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,2,128,0,1,fp8,fp8,0,0.027258666853109997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,4,128,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,4,128,0,1,fp8,fp8,0,0.02849599967400233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,8,128,0,1,float16,float16,0,0.023589332898457844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,8,4,128,0,1,float16,fp8,0,0.027642667293548584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,8,128,0,1,float16,fp8,0,0.023989332218964893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,8,128,0,1,fp8,fp8,0,0.022384000321229298
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,1,128,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,1,128,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,1,128,0,1,fp8,fp8,0,0.021242665747801464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,2,128,0,1,float16,float16,0,0.02350933353106181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,2,128,0,1,float16,fp8,0,0.02214933435122172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,2,128,0,1,fp8,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,4,128,0,1,float16,fp8,0,0.02348266790310542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,4,128,0,1,float16,float16,0,0.022970666488011677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,8,4,128,0,1,fp8,fp8,0,0.021359999974568684
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,8,128,0,1,float16,float16,0,0.020975999534130096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,8,128,0,1,float16,fp8,0,0.021301334102948506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,8,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,1,128,0,1,float16,float16,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,1,128,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,1,128,0,1,fp8,fp8,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,2,128,0,1,float16,float16,0,0.021066665649414062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,2,128,0,1,float16,fp8,0,0.021114667256673176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,2,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,4,128,0,1,float16,float16,0,0.019285333653291065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,4,128,0,1,float16,fp8,0,0.019695999721686046
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,8,4,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,8,128,0,1,float16,float16,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,8,128,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,8,128,0,1,fp8,fp8,0,0.01720000058412552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,1,128,0,1,float16,float16,0,0.019061333189407986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,1,128,0,1,float16,fp8,0,0.01907733331123988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,1,128,0,1,fp8,fp8,0,0.018735999862353008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,2,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,2,128,0,1,float16,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,2,128,0,1,fp8,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,4,128,0,1,float16,float16,0,0.01953599974513054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,4,128,0,1,float16,fp8,0,0.01923199991385142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,8,128,0,1,float16,float16,0,0.01871466636657715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,8,4,128,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,8,128,0,1,float16,fp8,0,0.019573333362738293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,8,128,0,1,fp8,fp8,0,0.01754666616519292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,1,128,0,1,float16,float16,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,1,128,0,1,float16,fp8,0,0.019424000134070713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,1,128,0,1,fp8,fp8,0,0.017594666530688603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,2,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,2,128,0,1,float16,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,2,128,0,1,fp8,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,4,128,0,1,float16,float16,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,4,128,0,1,float16,fp8,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,8,4,128,0,1,fp8,fp8,0,0.018933333456516266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,1,128,0,1,float16,float16,0,0.12602133552233377
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,1,128,0,1,fp8,fp8,0,0.12762133280436197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,1,128,0,1,float16,fp8,0,0.12734933694203696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,2,128,0,1,float16,float16,0,0.12894933422406515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,2,128,0,1,float16,fp8,0,0.12794133027394614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,2,128,0,1,fp8,fp8,0,0.1301866670449575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,4,128,0,1,float16,float16,0,0.13620799779891968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,4,128,0,1,float16,fp8,0,0.1349013348420461
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,8,4,128,0,1,fp8,fp8,0,0.13821867108345032
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,8,128,0,1,float16,float16,0,0.08291199803352356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,8,128,0,1,float16,fp8,0,0.08039466540018718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,8,128,0,1,fp8,fp8,0,0.08402666449546814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,1,128,0,1,float16,float16,0,0.07067200044790904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,1,128,0,1,float16,fp8,0,0.0689279983441035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,1,128,0,1,fp8,fp8,0,0.06651733318964641
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,2,128,0,1,float16,fp8,0,0.0708426684141159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,2,128,0,1,float16,float16,0,0.07023466626803081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,2,128,0,1,fp8,fp8,0,0.07149866720040639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,4,128,0,1,float16,float16,0,0.07472533484299977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,4,128,0,1,float16,fp8,0,0.0745066652695338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,8,4,128,0,1,fp8,fp8,0,0.07717866698900859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,8,128,0,1,float16,float16,0,0.04586666822433472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,8,128,0,1,float16,fp8,0,0.04541333516438802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,8,128,0,1,fp8,fp8,0,0.045850664377212524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,1,128,0,1,float16,float16,0,0.04389866689840952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,1,128,0,1,float16,fp8,0,0.04308266441027323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,1,128,0,1,fp8,fp8,0,0.03994666785001755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,2,128,0,1,float16,float16,0,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,2,128,0,1,float16,fp8,0,0.04165333261092504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,2,128,0,1,fp8,fp8,0,0.041482667128245033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,4,128,0,1,float16,float16,0,0.0436106671889623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,4,128,0,1,float16,fp8,0,0.043738668163617454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,8,128,0,1,float16,float16,0,0.02956799914439519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,8,4,128,0,1,fp8,fp8,0,0.044165333112080894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,8,128,0,1,float16,fp8,0,0.029845332105954487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,8,128,0,1,fp8,fp8,0,0.02978666623433431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,1,128,0,1,float16,float16,0,0.029493334392706554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,1,128,0,1,float16,fp8,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,1,128,0,1,fp8,fp8,0,0.028517333169778187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,2,128,0,1,float16,float16,0,0.028746667007605236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,2,128,0,1,float16,fp8,0,0.029696000119050343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,2,128,0,1,fp8,fp8,0,0.02888533224662145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,4,128,0,1,float16,float16,0,0.029093332588672638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,4,128,0,1,float16,fp8,0,0.02972800036271413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,8,4,128,0,1,fp8,fp8,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,8,128,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,8,128,0,1,float16,fp8,0,0.021082667013009388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,8,128,0,1,fp8,fp8,0,0.021386665602525074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,1,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,1,128,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,1,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,2,128,0,1,float16,float16,0,0.021269333859284718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,2,128,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,2,128,0,1,fp8,fp8,0,0.01982933282852173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,4,128,0,1,float16,float16,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,4,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,8,4,128,0,1,fp8,fp8,0,0.02128000060717265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,8,128,0,1,float16,fp8,0,0.017194667210181553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,8,128,0,1,fp8,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,1,128,0,1,float16,float16,0,0.018629333625237148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,8,128,0,1,float16,float16,0,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,1,128,0,1,float16,fp8,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,2,128,0,1,float16,float16,0,0.01899733394384384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,2,128,0,1,float16,fp8,0,0.0183146670460701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,2,128,0,1,fp8,fp8,0,0.0173333336909612
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,4,128,0,1,float16,fp8,0,0.018757333358128864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,4,128,0,1,float16,float16,0,0.017418666432301205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,8,4,128,0,1,fp8,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,8,128,0,1,float16,float16,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,8,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,8,128,0,1,fp8,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,1,128,0,1,float16,float16,0,0.017808000246683758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,1,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,1,128,0,1,fp8,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,2,128,0,1,float16,float16,0,0.01670933390657107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,4,128,0,1,float16,float16,0,0.01748266691962878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,2,128,0,1,fp8,fp8,0,0.016719999412695568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,2,128,0,1,float16,fp8,0,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,4,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,8,4,128,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,8,128,0,1,float16,float16,0,0.016458666572968166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,8,128,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,8,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,1,128,0,1,float16,float16,0,0.015962666521469753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,1,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,1,128,0,1,fp8,fp8,0,0.016282666474580765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,2,128,0,1,float16,float16,0,0.01590399940808614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,2,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,2,128,0,1,fp8,fp8,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,4,128,0,1,float16,float16,0,0.016762666404247284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,4,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,8,4,128,0,1,fp8,fp8,0,0.016410666207472484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,8,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,8,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,8,128,0,1,fp8,fp8,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,1,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,1,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,1,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,2,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,2,128,0,1,float16,fp8,0,0.016677333662907284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,2,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,4,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,4,128,0,1,float16,fp8,0,0.016048000504573185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,8,4,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,1,128,0,1,float16,float16,0,0.0888320008913676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,1,128,0,1,float16,fp8,0,0.08687999844551086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,1,128,0,1,fp8,fp8,0,0.08221866687138875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,2,128,0,1,float16,float16,0,0.08920533458391826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,2,128,0,1,float16,fp8,0,0.08725333213806152
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,2,128,0,1,fp8,fp8,0,0.08646933237711589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,4,128,0,1,float16,float16,0,0.09118400017420451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,4,128,0,1,fp8,fp8,0,0.09202667077382405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,8,4,128,0,1,float16,fp8,0,0.0913866659005483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,8,128,0,1,float16,float16,0,0.052255998055140175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,8,128,0,1,float16,fp8,0,0.05184000233809153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,8,128,0,1,fp8,fp8,0,0.05268799761931101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,1,128,0,1,float16,float16,0,0.04991999765237173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,1,128,0,1,float16,fp8,0,0.04996266464392344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,1,128,0,1,fp8,fp8,0,0.04577066500981649
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,2,128,0,1,float16,float16,0,0.05003199974695841
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,2,128,0,1,fp8,fp8,0,0.0479360024134318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,2,128,0,1,float16,fp8,0,0.04970133304595947
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,4,128,0,1,float16,float16,0,0.04984533290068308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,8,128,0,1,float16,float16,0,0.03486400097608566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,4,128,0,1,float16,fp8,0,0.04994133114814758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,8,4,128,0,1,fp8,fp8,0,0.05013333261013031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,8,128,0,1,float16,fp8,0,0.03401066611210505
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,8,128,0,1,fp8,fp8,0,0.03355200091997782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,1,128,0,1,float16,float16,0,0.0336053321758906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,1,128,0,1,fp8,fp8,0,0.03163733333349228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,1,128,0,1,float16,fp8,0,0.033488000432650246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,2,128,0,1,float16,float16,0,0.03388266762097677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,2,128,0,1,float16,fp8,0,0.03398933261632919
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,2,128,0,1,fp8,fp8,0,0.0314026673634847
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,4,128,0,1,float16,float16,0,0.03486400097608566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,4,128,0,1,float16,fp8,0,0.035114665826161705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,8,4,128,0,1,fp8,fp8,0,0.0336053321758906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,8,128,0,1,float16,float16,0,0.024101334313551586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,8,128,0,1,float16,fp8,0,0.024010665714740753
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,8,128,0,1,fp8,fp8,0,0.023007998863856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,1,128,0,1,float16,float16,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,1,128,0,1,float16,fp8,0,0.023354666928450268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,1,128,0,1,fp8,fp8,0,0.023237332701683044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,2,128,0,1,float16,float16,0,0.023546665906906128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,2,128,0,1,float16,fp8,0,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,2,128,0,1,fp8,fp8,0,0.023152001202106476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,4,128,0,1,float16,float16,0,0.023210667073726654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,4,128,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,8,4,128,0,1,fp8,fp8,0,0.023232000569502514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,8,128,0,1,float16,float16,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,8,128,0,1,float16,fp8,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,8,128,0,1,fp8,fp8,0,0.017488000293572743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,1,128,0,1,float16,float16,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,1,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,1,128,0,1,float16,fp8,0,0.017845333864291508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,2,128,0,1,float16,float16,0,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,2,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,2,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,4,128,0,1,float16,fp8,0,0.016895999511082966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,4,128,0,1,float16,float16,0,0.017818666994571686
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,8,4,128,0,1,fp8,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,8,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,8,128,0,1,float16,fp8,0,0.018570666511853535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,8,128,0,1,fp8,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,1,128,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,1,128,0,1,float16,fp8,0,0.017349333812793095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,1,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,2,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,2,128,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,2,128,0,1,float16,float16,0,0.01754133279124896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,4,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,4,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,8,4,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,8,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,8,128,0,1,fp8,fp8,0,0.016885332763195038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,8,128,0,1,float16,fp8,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,1,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,1,128,0,1,fp8,fp8,0,0.016741332908471424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,1,128,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,2,128,0,1,float16,float16,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,2,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,2,128,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,4,128,0,1,float16,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,4,128,0,1,float16,float16,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,8,4,128,0,1,fp8,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,8,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,8,128,0,1,float16,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,8,128,0,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,1,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,1,128,0,1,float16,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,1,128,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,2,128,0,1,float16,float16,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,4,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,2,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,2,128,0,1,float16,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,4,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,8,4,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,8,128,0,1,float16,float16,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,8,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,8,128,0,1,fp8,fp8,0,0.01611199975013733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,1,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,1,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,1,128,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,2,128,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,2,128,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,2,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,4,128,0,1,float16,float16,0,0.016314666718244553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,4,128,0,1,float16,fp8,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,8,4,128,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,1,128,0,1,float16,fp8,0,0.06599999964237213
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,1,128,0,1,float16,float16,0,0.06663466493288676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,1,128,0,1,fp8,fp8,0,0.0625600020090739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,2,128,0,1,float16,float16,0,0.06633600095907848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,2,128,0,1,float16,fp8,0,0.06648533542950948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,2,128,0,1,fp8,fp8,0,0.06282666822274525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,4,128,0,1,float16,float16,0,0.06826133529345195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,4,128,0,1,fp8,fp8,0,0.06646400193373363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,8,4,128,0,1,float16,fp8,0,0.06840000053246816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,8,128,0,1,float16,float16,0,0.041738669077555336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,8,128,0,1,float16,fp8,0,0.04180799921353658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,8,128,0,1,fp8,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,1,128,0,1,float16,float16,0,0.04136000076929728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,1,128,0,1,float16,fp8,0,0.03989866624275843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,1,128,0,1,fp8,fp8,0,0.037920000652472176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,2,128,0,1,float16,float16,0,0.03987200061480204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,2,128,0,1,float16,fp8,0,0.04019733270009359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,2,128,0,1,fp8,fp8,0,0.03762666632731756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,4,128,0,1,float16,float16,0,0.04190400242805481
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,8,128,0,1,float16,float16,0,0.029343999922275543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,4,128,0,1,float16,fp8,0,0.041434665520985924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,8,4,128,0,1,fp8,fp8,0,0.03993066648642222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,8,128,0,1,float16,fp8,0,0.029157333076000214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,8,128,0,1,fp8,fp8,0,0.027327999472618103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,1,128,0,1,float16,float16,0,0.02733866622050603
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,1,128,0,1,float16,fp8,0,0.027621333797772724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,1,128,0,1,fp8,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,2,128,0,1,float16,float16,0,0.027306665976842243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,2,128,0,1,float16,fp8,0,0.02741333345572154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,2,128,0,1,fp8,fp8,0,0.027429332335789997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,4,128,0,1,float16,float16,0,0.02739199995994568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,4,128,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,8,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,8,4,128,0,1,fp8,fp8,0,0.02749866743882497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,8,128,0,1,float16,fp8,0,0.021007999777793884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,8,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,1,128,0,1,float16,float16,0,0.020853333175182343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,1,128,0,1,float16,fp8,0,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,1,128,0,1,fp8,fp8,0,0.019482667247454327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,2,128,0,1,float16,float16,0,0.019866666446129482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,2,128,0,1,float16,fp8,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,2,128,0,1,fp8,fp8,0,0.019391999890406925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,4,128,0,1,float16,float16,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,4,128,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,8,4,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,8,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,8,128,0,1,float16,fp8,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,1,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,8,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,1,128,0,1,float16,float16,0,0.01699200024207433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,1,128,0,1,fp8,fp8,0,0.016208000481128693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,2,128,0,1,float16,float16,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,2,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,2,128,0,1,fp8,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,4,128,0,1,float16,float16,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,4,128,0,1,float16,fp8,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,8,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,8,4,128,0,1,fp8,fp8,0,0.01777600000301997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,8,128,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,8,128,0,1,fp8,fp8,0,0.015482666591803232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,1,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,1,128,0,1,fp8,fp8,0,0.016352000335852306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,1,128,0,1,float16,fp8,0,0.01741333305835724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,2,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,2,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,2,128,0,1,fp8,fp8,0,0.016037333756685257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,4,128,0,1,float16,fp8,0,0.016821333517630894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,4,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,8,4,128,0,1,fp8,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,8,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,8,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,1,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,1,128,0,1,float16,fp8,0,0.016255999604860943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,8,128,0,1,fp8,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,2,128,0,1,float16,float16,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,1,128,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,2,128,0,1,float16,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,2,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,4,128,0,1,float16,float16,0,0.016842667013406754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,4,128,0,1,float16,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,8,4,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,8,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,8,128,0,1,float16,fp8,0,0.016122666498025257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,8,128,0,1,fp8,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,1,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,1,128,0,1,float16,fp8,0,0.01603200038274129
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,1,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,2,128,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,2,128,0,1,float16,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,2,128,0,1,fp8,fp8,0,0.016415999581416447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,4,128,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,4,128,0,1,float16,fp8,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,8,4,128,0,1,fp8,fp8,0,0.015893333901961643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,8,128,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,8,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,8,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,1,128,0,1,float16,float16,0,0.015157333264748255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,1,128,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,1,128,0,1,fp8,fp8,0,0.015664000064134598
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,2,128,0,1,float16,float16,0,0.014912000546852747
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,2,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,2,128,0,1,float16,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,4,128,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,4,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,8,4,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,1,128,0,1,float16,float16,0,0.05821333328882853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,1,128,0,1,float16,fp8,0,0.05624000231424967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,1,128,0,1,fp8,fp8,0,0.05599466462930044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,2,128,0,1,float16,float16,0,0.05819733440876007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,2,128,0,1,float16,fp8,0,0.05628266433874766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,2,128,0,1,fp8,fp8,0,0.055973331133524575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,4,128,0,1,float16,float16,0,0.058320000767707825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,4,128,0,1,float16,fp8,0,0.0582239975531896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,8,4,128,0,1,fp8,fp8,0,0.05788266658782959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,8,128,0,1,float16,float16,0,0.03549866626660029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,8,128,0,1,float16,fp8,0,0.03611200054486593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,8,128,0,1,fp8,fp8,0,0.03575466573238373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,1,128,0,1,float16,float16,0,0.03494933247566223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,1,128,0,1,float16,fp8,0,0.03561066587766012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,1,128,0,1,fp8,fp8,0,0.033589333295822144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,2,128,0,1,float16,float16,0,0.03538133452335993
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,2,128,0,1,float16,fp8,0,0.03548266738653183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,2,128,0,1,fp8,fp8,0,0.033802665770053864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,4,128,0,1,float16,float16,0,0.035690667728583016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,4,128,0,1,float16,fp8,0,0.035487999518712364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,8,4,128,0,1,fp8,fp8,0,0.033887999753157295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,8,128,0,1,float16,float16,0,0.025813333690166473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,8,128,0,1,float16,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,8,128,0,1,fp8,fp8,0,0.023573334018389385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,1,128,0,1,float16,float16,0,0.025466665625572205
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,1,128,0,1,float16,fp8,0,0.025392000873883564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,1,128,0,1,fp8,fp8,0,0.02447466552257538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,2,128,0,1,float16,float16,0,0.025146665672461193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,2,128,0,1,float16,fp8,0,0.025194667279720306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,2,128,0,1,fp8,fp8,0,0.02513599892457326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,4,128,0,1,float16,float16,0,0.025253333151340485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,4,128,0,1,float16,fp8,0,0.025231999655564625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,8,128,0,1,float16,float16,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,8,4,128,0,1,fp8,fp8,0,0.02518400053183238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,8,128,0,1,float16,fp8,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,8,128,0,1,fp8,fp8,0,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,1,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,1,128,0,1,float16,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,1,128,0,1,fp8,fp8,0,0.019039999693632126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,2,128,0,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,2,128,0,1,float16,fp8,0,0.019199999670187633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,2,128,0,1,fp8,fp8,0,0.019727999965349834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,4,128,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,4,128,0,1,float16,fp8,0,0.019317333896954853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,8,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,8,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,8,4,128,0,1,fp8,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,8,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,1,128,0,1,float16,float16,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,1,128,0,1,float16,fp8,0,0.017269333203633625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,2,128,0,1,float16,fp8,0,0.017290666699409485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,2,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,1,128,0,1,fp8,fp8,0,0.015813333292802174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,2,128,0,1,fp8,fp8,0,0.015557333827018738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,4,128,0,1,float16,float16,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,4,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,8,128,0,1,float16,float16,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,8,4,128,0,1,fp8,fp8,0,0.017674667139848072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,8,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,8,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,1,128,0,1,float16,float16,0,0.014767999450365702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,1,128,0,1,fp8,fp8,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,1,128,0,1,float16,fp8,0,0.017445333302021027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,2,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,2,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,2,128,0,1,fp8,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,4,128,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,4,128,0,1,float16,float16,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,8,4,128,0,1,fp8,fp8,0,0.017136000096797943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,8,128,0,1,float16,float16,0,0.015552000453074774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,8,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,8,128,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,1,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,1,128,0,1,float16,fp8,0,0.016069332758585613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,2,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,1,128,0,1,fp8,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,2,128,0,1,float16,fp8,0,0.01666133354107539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,2,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,4,128,0,1,float16,float16,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,4,128,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,8,4,128,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,8,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,8,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,8,128,0,1,fp8,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,1,128,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,1,128,0,1,float16,fp8,0,0.015791999797026317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,1,128,0,1,fp8,fp8,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,2,128,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,2,128,0,1,fp8,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,2,128,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,4,128,0,1,float16,float16,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,4,128,0,1,float16,fp8,0,0.015578666081031164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,8,4,128,0,1,fp8,fp8,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,8,128,0,1,float16,float16,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,8,128,0,1,float16,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,8,128,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,1,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,1,128,0,1,float16,fp8,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,1,128,0,1,fp8,fp8,0,0.015669333438078564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,2,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,2,128,0,1,float16,fp8,0,0.01634666696190834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,2,128,0,1,fp8,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,4,128,0,1,float16,float16,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,4,128,0,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,8,4,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,0,0.04997866849104563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,0,0.04958933095137278
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,1,128,0,1,fp8,fp8,0,0.04587733248869578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,0,0.04966933528582255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,0,0.04981866478919983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,2,128,0,1,fp8,fp8,0,0.04597333570321401
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,0,0.05173333485921224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,0,0.0498933345079422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,0,0.031983998914559685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,8,4,128,0,1,fp8,fp8,0,0.045909335215886436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,0,0.03180799881617228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,8,128,0,1,fp8,fp8,0,0.031514666974544525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,0,0.031317333380381264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,0,0.03148799886306127
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,1,128,0,1,fp8,fp8,0,0.031002665559450786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,2,128,0,1,fp8,fp8,0,0.030085332691669464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,0,0.031397332747777305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,0,0.03151999910672506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,8,4,128,0,1,fp8,fp8,0,0.029391999046007793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,0,0.02330133318901062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,8,128,0,1,fp8,fp8,0,0.022837333381175995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,0,0.02309333284695943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,1,128,0,1,fp8,fp8,0,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,0,0.023168000082174938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,0,0.02327999969323476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,2,128,0,1,fp8,fp8,0,0.023311999936898548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,0,0.023269332945346832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,8,4,128,0,1,fp8,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,0,0.019002666076024372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,8,128,0,1,fp8,fp8,0,0.019258666783571243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,0,0.0233599990606308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,1,128,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,0,0.022970666488011677
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,0,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,2,128,0,1,fp8,fp8,0,0.018858666221300762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,8,4,128,0,1,fp8,fp8,0,0.019445333629846573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,0,0.016480000068744022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,8,128,0,1,fp8,fp8,0,0.017162666966517765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,1,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,2,128,0,1,fp8,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,0,0.018197332819302876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,8,4,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,0,0.01611199975013733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,8,128,0,1,fp8,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,1,128,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,0,0.01642666632930438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,2,128,0,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,0,0.016773333152135212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,0,0.01588800052801768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,8,4,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,8,128,0,1,fp8,fp8,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,1,128,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,0,0.017317333569129307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,2,128,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,8,4,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,8,128,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,0,0.016976000120242436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,1,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,0,0.01674666628241539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,2,128,0,1,fp8,fp8,0,0.01639466608564059
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,8,4,128,0,1,fp8,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,8,128,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,1,128,0,1,fp8,fp8,0,0.01544533297419548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,2,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,8,4,128,0,1,fp8,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,1,128,0,1,fp8,fp8,0,1.1866506735483806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,float16,0,1.407317320505778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,1,128,0,1,float16,fp8,0,1.4494239489237468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,2,128,0,1,fp8,fp8,0,1.2017227013905842
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,float16,0,1.4402987162272136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,4,2,128,0,1,float16,fp8,0,1.4270933469136555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,float16,0,0.7942026456197103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,4,128,0,1,float16,fp8,0,0.8109386761983236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,4,128,0,1,fp8,fp8,0,0.7105813026428223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,float16,0,0.765882651011149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,1,128,0,1,fp8,fp8,0,0.6530346473058065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,1,128,0,1,float16,fp8,0,0.7630506356557211
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,float16,0,0.7905546824137369
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,float16,0,0.45762133598327637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,4,128,0,1,float16,fp8,0,0.457535982131958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,2,128,0,1,fp8,fp8,0,0.6605600118637085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,4,2,128,0,1,float16,fp8,0,0.7818346818288168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,4,128,0,1,fp8,fp8,0,0.39603201548258465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,float16,0,0.429696003595988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,1,128,0,1,float16,fp8,0,0.43379731973012287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,1,128,0,1,fp8,fp8,0,0.3808213472366333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,float16,0,0.4434986511866252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,2,128,0,1,float16,fp8,0,0.43928531805674237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,4,2,128,0,1,fp8,fp8,0,0.38753068447113037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,float16,0,0.2817759911219279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,4,128,0,1,float16,fp8,0,0.28268800179163617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,4,128,0,1,fp8,fp8,0,0.25314666827519733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,float16,0,0.2757386763890584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,1,128,0,1,float16,fp8,0,0.2775999903678894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,1,128,0,1,fp8,fp8,0,0.24492265780766806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,float16,0,0.27703466018040973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,2,128,0,1,float16,fp8,0,0.2797759970029195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,4,2,128,0,1,fp8,fp8,0,0.24752533435821533
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,float16,0,0.8703040281931559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,1,128,0,1,fp8,fp8,0,0.7353706359863281
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,1,128,0,1,float16,fp8,0,0.8615893522898356
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,float16,0,0.8811679681142172
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,float16,0,0.5014826854070028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,4,128,0,1,float16,fp8,0,0.5095093250274658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,2,128,0,1,fp8,fp8,0,0.7463786602020264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,4,2,128,0,1,float16,fp8,0,0.8804213205973307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,4,128,0,1,fp8,fp8,0,0.4306826591491699
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,float16,0,0.4790453513463338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,1,128,0,1,float16,fp8,0,0.47723201910654706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,1,128,0,1,fp8,fp8,0,0.4164479970932007
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,float16,0,0.48109865188598633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,float16,0,0.29868799448013306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,2,128,0,1,fp8,fp8,0,0.4190933307011922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,4,2,128,0,1,float16,fp8,0,0.48659733931223553
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,4,128,0,1,float16,fp8,0,0.29760533571243286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,4,128,0,1,fp8,fp8,0,0.2604106664657593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,float16,0,0.27958933512369794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,1,128,0,1,float16,fp8,0,0.27684799830118817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,1,128,0,1,fp8,fp8,0,0.2441706657409668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,float16,0,0.28088533878326416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,2,128,0,1,float16,fp8,0,0.2817280093828837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,4,2,128,0,1,fp8,fp8,0,0.2507359981536865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,float16,0,0.1813546617825826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,4,128,0,1,float16,fp8,0,0.18488534291585287
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,4,128,0,1,fp8,fp8,0,0.16775999466578165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,float16,0,0.1821920077006022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,1,128,0,1,float16,fp8,0,0.18118399381637573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,1,128,0,1,fp8,fp8,0,0.1649386684099833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,float16,0,0.18310399850209555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,2,128,0,1,float16,fp8,0,0.1821546753247579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,4,2,128,0,1,fp8,fp8,0,0.1649386684099833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,float16,0,0.6438133319218954
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,1,128,0,1,fp8,fp8,0,0.5482346614201864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,1,128,0,1,float16,fp8,0,0.63645867506663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,float16,0,0.6541279951731364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,2,128,0,1,fp8,fp8,0,0.559221347173055
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,4,2,128,0,1,float16,fp8,0,0.6589546600977579
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,float16,0,0.3842933177947998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,4,128,0,1,fp8,fp8,0,0.3290613293647766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,4,128,0,1,float16,fp8,0,0.38355199495951336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,float16,0,0.35638399918874103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,1,128,0,1,float16,fp8,0,0.3537600040435791
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,1,128,0,1,fp8,fp8,0,0.31330132484436035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,2,128,0,1,fp8,fp8,0,0.31692800919214886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,fp8,0,0.3662186861038208
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,4,2,128,0,1,float16,float16,0,0.3640480041503906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,float16,0,0.224565327167511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,4,128,0,1,float16,fp8,0,0.2271626591682434
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,4,128,0,1,fp8,fp8,0,0.20366400480270386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,float16,0,0.21451733509699503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,1,128,0,1,fp8,fp8,0,0.19293334086736044
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,1,128,0,1,float16,fp8,0,0.2155146598815918
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,2,128,0,1,fp8,fp8,0,0.1964213252067566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,float16,0,0.2161440054575602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,4,2,128,0,1,float16,fp8,0,0.21892799933751425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,float16,0,0.14060266812642416
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,4,128,0,1,float16,fp8,0,0.14094932874043783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,4,128,0,1,fp8,fp8,0,0.1284160017967224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,float16,0,0.13800533612569174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,1,128,0,1,float16,fp8,0,0.13859732945760092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,1,128,0,1,fp8,fp8,0,0.12616533041000366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,float16,0,0.138154665629069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,2,128,0,1,float16,fp8,0,0.1402453382809957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,4,2,128,0,1,fp8,fp8,0,0.12811199824015299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,1,128,0,1,fp8,fp8,0,0.7153386274973551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,float16,0,0.8409972985585531
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,1,128,0,1,float16,fp8,0,0.836079994837443
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,float16,0,0.8539946873982748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,float16,0,0.48074134190877277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,2,128,0,1,fp8,fp8,0,0.7261599699656168
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,4,2,128,0,1,float16,fp8,0,0.8602773348490397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,4,128,0,1,float16,fp8,0,0.48843201001485187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,4,128,0,1,fp8,fp8,0,0.4110453526178996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,float16,0,0.4508746862411499
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,1,128,0,1,float16,fp8,0,0.4482453266779582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,1,128,0,1,fp8,fp8,0,0.3891199827194214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,float16,0,0.4545760154724121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,float16,0,0.27698665857315063
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,2,128,0,1,float16,fp8,0,0.4618186553319295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,4,2,128,0,1,fp8,fp8,0,0.39602665106455487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,4,128,0,1,float16,fp8,0,0.2770400047302246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,4,128,0,1,fp8,fp8,0,0.23972266912460327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,float16,0,0.25309866666793823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,1,128,0,1,float16,fp8,0,0.2489759922027588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,1,128,0,1,fp8,fp8,0,0.22421334187189737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,fp8,0,0.2589973409970601
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,2,128,0,1,float16,float16,0,0.25699732700983685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,4,2,128,0,1,fp8,fp8,0,0.23041599988937378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,float16,0,0.16160000363985697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,4,128,0,1,float16,fp8,0,0.16449600458145142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,4,128,0,1,fp8,fp8,0,0.15037866433461508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,float16,0,0.1588106652100881
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,1,128,0,1,float16,fp8,0,0.15918933351834616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,1,128,0,1,fp8,fp8,0,0.14366933703422546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,float16,0,0.16025066375732422
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,2,128,0,1,float16,fp8,0,0.16083733240763345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,4,2,128,0,1,fp8,fp8,0,0.14471466342608133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,float16,0,0.1095306674639384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,4,128,0,1,float16,fp8,0,0.11120532949765523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,4,128,0,1,fp8,fp8,0,0.10190932949384053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,float16,0,0.1074773371219635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,1,128,0,1,float16,fp8,0,0.10762133200963338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,1,128,0,1,fp8,fp8,0,0.09930666287740071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,float16,0,0.10812800129254659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,2,128,0,1,float16,fp8,0,0.10931199789047241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,4,2,128,0,1,fp8,fp8,0,0.10047466556231181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,float16,0,0.5310933192571005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,1,128,0,1,fp8,fp8,0,0.45846935113271076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,1,128,0,1,float16,fp8,0,0.5346933205922445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,float16,0,0.5450666745503744
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,float16,0,0.3168213367462158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,4,128,0,1,float16,fp8,0,0.32041066884994507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,2,128,0,1,float16,fp8,0,0.5462239980697632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,4,2,128,0,1,fp8,fp8,0,0.4694506724675496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,4,128,0,1,fp8,fp8,0,0.27426133553187054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,float16,0,0.2855573296546936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,1,128,0,1,fp8,fp8,0,0.2572106719017029
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,1,128,0,1,float16,fp8,0,0.2863786617914836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,float16,0,0.2988053361574809
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,float16,0,0.18456000089645386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,2,128,0,1,float16,fp8,0,0.29967466990152997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,4,2,128,0,1,fp8,fp8,0,0.26125866174697876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,4,128,0,1,float16,fp8,0,0.18622400363286337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,4,128,0,1,fp8,fp8,0,0.16226133704185486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,float16,0,0.1643893321355184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,1,128,0,1,float16,fp8,0,0.16218133767445883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,1,128,0,1,fp8,fp8,0,0.14709333578745523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,float16,0,0.16825600465138754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,2,128,0,1,float16,fp8,0,0.16514666875203451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,4,2,128,0,1,fp8,fp8,0,0.15446399648984274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,float16,0,0.11062933007876079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,4,128,0,1,float16,fp8,0,0.11065066854159038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,4,128,0,1,fp8,fp8,0,0.10128532846768697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,float16,0,0.10914132992426555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,1,128,0,1,float16,fp8,0,0.10732799768447876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,float16,0,0.10854933659235637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,1,128,0,1,fp8,fp8,0,0.09763200084368388
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,2,128,0,1,float16,fp8,0,0.11036266883214314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,4,2,128,0,1,fp8,fp8,0,0.09991466999053955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,float16,0,0.08692800005276997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,4,128,0,1,float16,fp8,0,0.0869760016600291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,4,128,0,1,fp8,fp8,0,0.07922666768232982
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,fp8,0,0.08505599697430928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,1,128,0,1,float16,float16,0,0.08450667063395183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,1,128,0,1,fp8,fp8,0,0.0773119976123174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,float16,0,0.084906667470932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,2,128,0,1,float16,fp8,0,0.08528000116348267
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,4,2,128,0,1,fp8,fp8,0,0.07832533121109009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,float16,0,0.5456480185190836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,1,128,0,1,float16,fp8,0,0.5541280110677084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,1,128,0,1,fp8,fp8,0,0.47908798853556317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,float16,0,0.5647039810816447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,2,128,0,1,float16,fp8,0,0.5714666843414307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,4,2,128,0,1,fp8,fp8,0,0.49405332406361896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,float16,0,0.3226613402366638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,4,128,0,1,float16,fp8,0,0.3275946577390035
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,4,128,0,1,fp8,fp8,0,0.28153600295384723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,1,128,0,1,fp8,fp8,0,0.25911466280619305
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,float16,0,0.28778666257858276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,1,128,0,1,float16,fp8,0,0.29119465748469037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,float16,0,0.3030400077501933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,2,128,0,1,float16,fp8,0,0.30347732702891034
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,4,2,128,0,1,fp8,fp8,0,0.2662293314933777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,float16,0,0.1824693282445272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,4,128,0,1,fp8,fp8,0,0.1623840034008026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,4,128,0,1,float16,fp8,0,0.18407466014226279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,float16,0,0.15796800454457602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,1,128,0,1,fp8,fp8,0,0.14603199561436972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,1,128,0,1,float16,fp8,0,0.15811199943224588
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,float16,0,0.16681599617004395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,2,128,0,1,fp8,fp8,0,0.152730663617452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,4,2,128,0,1,float16,fp8,0,0.16514666875203451
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,fp8,0,0.10725866754849751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,4,128,0,1,float16,float16,0,0.10507200161616008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,4,128,0,1,fp8,fp8,0,0.09903466701507568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,float16,0,0.09940266609191895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,1,128,0,1,float16,fp8,0,0.10150399804115295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,float16,0,0.10114133358001709
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,1,128,0,1,fp8,fp8,0,0.0918986697991689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,2,128,0,1,float16,fp8,0,0.10340799887975057
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,4,2,128,0,1,fp8,fp8,0,0.09131200114885966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,float16,0,0.0681333343187968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,4,128,0,1,fp8,fp8,0,0.06452266871929169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,4,128,0,1,float16,fp8,0,0.06913599868615468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,float16,0,0.06633600095907848
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,1,128,0,1,float16,fp8,0,0.06644799808661143
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,1,128,0,1,fp8,fp8,0,0.0622026671965917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,float16,0,0.0670826683441798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,2,128,0,1,float16,fp8,0,0.06654933094978333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,4,2,128,0,1,fp8,fp8,0,0.06242666641871134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,float16,0,0.06203199923038483
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,4,128,0,1,fp8,fp8,0,0.05781333148479462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,4,128,0,1,float16,fp8,0,0.06253866851329803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,fp8,0,0.06026133398214976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,1,128,0,1,float16,float16,0,0.06070399781068166
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,1,128,0,1,fp8,fp8,0,0.05808533231417338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,float16,0,0.06088533500830332
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,2,128,0,1,float16,fp8,0,0.061664000153541565
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,4,2,128,0,1,fp8,fp8,0,0.05669866502285004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,float16,0,0.3714826504389445
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,1,128,0,1,float16,fp8,0,0.3707306782404582
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,1,128,0,1,fp8,fp8,0,0.32553066809972125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,float16,0,0.38602133591969806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,2,128,0,1,float16,fp8,0,0.3867306709289551
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,float16,0,0.2229386568069458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,4,2,128,0,1,fp8,fp8,0,0.33796266714731854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,4,128,0,1,float16,fp8,0,0.22474133968353271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,4,128,0,1,fp8,fp8,0,0.19689067204793295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,float16,0,0.19461333751678467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,1,128,0,1,fp8,fp8,0,0.17916266123453775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,1,128,0,1,float16,fp8,0,0.19645333290100098
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,float16,0,0.2087679902712504
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,2,128,0,1,float16,fp8,0,0.20815465847651163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,float16,0,0.12526399890581766
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,4,2,128,0,1,fp8,fp8,0,0.18563199043273926
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,4,128,0,1,float16,fp8,0,0.1257866621017456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,4,128,0,1,fp8,fp8,0,0.1144533356030782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,float16,0,0.10746133327484131
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,1,128,0,1,float16,fp8,0,0.10985066493352254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,1,128,0,1,fp8,fp8,0,0.09946133693059285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,float16,0,0.10994666814804077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,2,128,0,1,float16,fp8,0,0.11172266801198323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,4,2,128,0,1,fp8,fp8,0,0.10595200459162395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,float16,0,0.07410133381684621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,4,128,0,1,float16,fp8,0,0.07452799876530965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,4,128,0,1,fp8,fp8,0,0.06871466835339864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,float16,0,0.070783997575442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,1,128,0,1,float16,fp8,0,0.07274666428565979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,1,128,0,1,fp8,fp8,0,0.06465066472689311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,float16,0,0.07225066423416138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,2,128,0,1,float16,fp8,0,0.07236266632874806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,4,2,128,0,1,fp8,fp8,0,0.06625600159168243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,float16,0,0.0554613322019577
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,4,128,0,1,float16,fp8,0,0.05407466491063436
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,4,128,0,1,fp8,fp8,0,0.05227200190226237
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,float16,0,0.05452266832192739
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,1,128,0,1,float16,fp8,0,0.05301866432030996
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,1,128,0,1,fp8,fp8,0,0.050069332122802734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,float16,0,0.054058666030565895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,2,128,0,1,float16,fp8,0,0.053818667928377785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,4,2,128,0,1,fp8,fp8,0,0.0499946673711141
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,float16,0,0.05006400247414907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,4,128,0,1,float16,fp8,0,0.05041599770387014
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,4,128,0,1,fp8,fp8,0,0.047610665361086525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,float16,0,0.04993600149949392
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,1,128,0,1,float16,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,1,128,0,1,fp8,fp8,0,0.0458186666170756
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,float16,0,0.05009066561857859
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,2,128,0,1,float16,fp8,0,0.05013333261013031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,4,2,128,0,1,fp8,fp8,0,0.04788800080617269
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,float16,0,0.43341867129007977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,1,128,0,1,fp8,fp8,0,0.38495465119679767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,1,128,0,1,float16,fp8,0,0.43190399805704754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,float16,0,0.4474879900614421
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,2,128,0,1,fp8,fp8,0,0.3923413356145223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,4,2,128,0,1,float16,fp8,0,0.44697598616282147
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,float16,0,0.25386132796605426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,4,128,0,1,fp8,fp8,0,0.22494399547576904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,4,128,0,1,float16,fp8,0,0.25269333521525067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,float16,0,0.22372267643610635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,1,128,0,1,float16,fp8,0,0.22538133462270102
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,1,128,0,1,fp8,fp8,0,0.20318400859832764
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,float16,0,0.2342026631037394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,2,128,0,1,fp8,fp8,0,0.2095253268877665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,4,2,128,0,1,float16,fp8,0,0.23753066857655844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,float16,0,0.13877333203951517
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,4,128,0,1,float16,fp8,0,0.1402400036652883
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,4,128,0,1,fp8,fp8,0,0.12641066312789917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,float16,0,0.11416000127792358
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,1,128,0,1,float16,fp8,0,0.11534399787584941
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,1,128,0,1,fp8,fp8,0,0.1086133321126302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,float16,0,0.12570666273434958
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,2,128,0,1,float16,fp8,0,0.12583466370900473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,4,2,128,0,1,fp8,fp8,0,0.11743467052777608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,float16,0,0.07458133498827617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,4,128,0,1,float16,fp8,0,0.07645866771539052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,4,128,0,1,fp8,fp8,0,0.07384000221888225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,float16,0,0.07189866900444031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,1,128,0,1,fp8,fp8,0,0.06482666730880737
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,1,128,0,1,float16,fp8,0,0.07233599821726482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,float16,0,0.07054399947325389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,2,128,0,1,float16,fp8,0,0.07387733459472656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,4,2,128,0,1,fp8,fp8,0,0.06634666522343953
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,float16,0,0.04806933303674062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,4,128,0,1,float16,fp8,0,0.04785599807898203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,4,128,0,1,fp8,fp8,0,0.0443200021982193
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,float16,0,0.04649066428343455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,1,128,0,1,float16,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,1,128,0,1,fp8,fp8,0,0.043712000052134194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,float16,0,0.045994664231936135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,2,128,0,1,float16,fp8,0,0.04862933357556661
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,4,2,128,0,1,fp8,fp8,0,0.04596266647179922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,float16,0,0.041850666205088295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,4,128,0,1,float16,fp8,0,0.04162133236726125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,4,128,0,1,fp8,fp8,0,0.040106666584809623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,float16,0,0.04221333563327789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,1,128,0,1,float16,fp8,0,0.04160533348719279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,1,128,0,1,fp8,fp8,0,0.03856533269087473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,fp8,0,0.04177066683769226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,2,128,0,1,fp8,fp8,0,0.03979733337958654
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,4,2,128,0,1,float16,float16,0,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,float16,0,0.039488000174363456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,4,128,0,1,float16,fp8,0,0.039818666875362396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,4,128,0,1,fp8,fp8,0,0.037658666570981346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,fp8,0,0.03965866565704346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,1,128,0,1,float16,float16,0,0.03986666599909464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,float16,0,0.0397119993964831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,1,128,0,1,fp8,fp8,0,0.035775999228159584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,2,128,0,1,float16,fp8,0,0.03952533255020777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,4,2,128,0,1,fp8,fp8,0,0.03812266637881597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,float16,0,0.2897546688715617
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,1,128,0,1,fp8,fp8,0,0.2581226627031962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,1,128,0,1,float16,fp8,0,0.29206933577855426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,float16,0,0.2988160053888957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,float16,0,0.17473065853118896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,4,128,0,1,float16,fp8,0,0.17721599340438843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,2,128,0,1,fp8,fp8,0,0.26678399244944256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,4,2,128,0,1,float16,fp8,0,0.3024959961573283
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,4,128,0,1,fp8,fp8,0,0.1611840029557546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,float16,0,0.15294933319091797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,1,128,0,1,float16,fp8,0,0.15342400471369425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,1,128,0,1,fp8,fp8,0,0.1422719955444336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,float16,0,0.16312533617019653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,2,128,0,1,float16,fp8,0,0.16262400150299072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,float16,0,0.09715732932090759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,4,2,128,0,1,fp8,fp8,0,0.14549866318702698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,4,128,0,1,float16,fp8,0,0.09912000099817912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,4,128,0,1,fp8,fp8,0,0.09192533294359843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,float16,0,0.08267733454704285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,1,128,0,1,float16,fp8,0,0.08476266264915466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,1,128,0,1,fp8,fp8,0,0.07668800155321757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,float16,0,0.08619733651479085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,2,128,0,1,float16,fp8,0,0.08764800429344177
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,4,2,128,0,1,fp8,fp8,0,0.08282666901747386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,float16,0,0.05570666491985321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,4,128,0,1,float16,fp8,0,0.05807466804981232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,4,128,0,1,fp8,fp8,0,0.05394133428732554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,float16,0,0.054085334142049156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,1,128,0,1,float16,fp8,0,0.05589333176612854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,1,128,0,1,fp8,fp8,0,0.05008533100287119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,float16,0,0.05518400172392527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,float16,0,0.0399893323580424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,2,128,0,1,float16,fp8,0,0.056405335664749146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,4,2,128,0,1,fp8,fp8,0,0.05182399849096934
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,4,128,0,1,float16,fp8,0,0.040207999447981514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,4,128,0,1,fp8,fp8,0,0.03753600021203359
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,float16,0,0.03948266555865606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,1,128,0,1,float16,fp8,0,0.039520000418027244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,float16,0,0.03972266614437103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,1,128,0,1,fp8,fp8,0,0.03580799947182337
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,2,128,0,1,float16,fp8,0,0.03993066648642222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,4,2,128,0,1,fp8,fp8,0,0.03732266773780187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,float16,0,0.03562666724125544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,4,128,0,1,float16,fp8,0,0.03551999976237615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,4,128,0,1,fp8,fp8,0,0.033439998825391136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,float16,0,0.03532266616821289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,1,128,0,1,float16,fp8,0,0.03535466641187668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,1,128,0,1,fp8,fp8,0,0.03310933212439219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,float16,0,0.035818666219711304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,2,128,0,1,float16,fp8,0,0.03544000039498011
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,4,2,128,0,1,fp8,fp8,0,0.033914667864640556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,float16,0,0.033770665526390076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,4,128,0,1,float16,fp8,0,0.03349333256483078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,4,128,0,1,fp8,fp8,0,0.03187733391920725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,float16,0,0.03365333378314972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,1,128,0,1,float16,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,1,128,0,1,fp8,fp8,0,0.031189332405726116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,float16,0,0.03355200091997782
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,2,128,0,1,float16,fp8,0,0.033615998923778534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,4,2,128,0,1,fp8,fp8,0,0.03121600051720937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,float16,0,0.2993280092875163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,1,128,0,1,float16,fp8,0,0.29730133215586346
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,1,128,0,1,fp8,fp8,0,0.29661866029103595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,float16,0,0.30929599205652875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,2,128,0,1,float16,fp8,0,0.30614399909973145
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,4,2,128,0,1,fp8,fp8,0,0.3024853269259135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,float16,0,0.17811733484268188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,4,128,0,1,float16,fp8,0,0.17466666301091513
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,4,128,0,1,fp8,fp8,0,0.17099199692408243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,float16,0,0.1588586668173472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,1,128,0,1,float16,fp8,0,0.15894400080045065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,1,128,0,1,fp8,fp8,0,0.15897066394488016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,float16,0,0.1660533348719279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,2,128,0,1,float16,fp8,0,0.1637493371963501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,4,2,128,0,1,fp8,fp8,0,0.1625493367513021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,float16,0,0.10366933544476827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,4,128,0,1,float16,fp8,0,0.10125866532325745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,4,128,0,1,fp8,fp8,0,0.09613866607348125
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,float16,0,0.0870293378829956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,1,128,0,1,float16,fp8,0,0.08493333061536153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,1,128,0,1,fp8,fp8,0,0.08270933230717976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,float16,0,0.09033067027727763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,2,128,0,1,fp8,fp8,0,0.09105066458384196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,4,2,128,0,1,float16,fp8,0,0.09062400460243225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,float16,0,0.05499200026194254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,4,128,0,1,float16,fp8,0,0.05558399856090546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,4,128,0,1,fp8,fp8,0,0.055999999245007835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,float16,0,0.05356266597906748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,1,128,0,1,float16,fp8,0,0.05218133330345154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,1,128,0,1,fp8,fp8,0,0.0480373352766037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,float16,0,0.054197331269582115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,2,128,0,1,float16,fp8,0,0.05426133175690969
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,float16,0,0.035786665976047516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,4,2,128,0,1,fp8,fp8,0,0.049973333875338234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,4,128,0,1,float16,fp8,0,0.035818666219711304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,float16,0,0.03551999976237615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,1,128,0,1,float16,fp8,0,0.03575466573238373
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,4,128,0,1,fp8,fp8,0,0.03605333218971888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,1,128,0,1,fp8,fp8,0,0.03364266703526179
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,float16,0,0.035530666510264076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,2,128,0,1,float16,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,4,2,128,0,1,fp8,fp8,0,0.03387200087308884
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,float16,0,0.02922666569550832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,4,128,0,1,float16,fp8,0,0.029616000751654308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,4,128,0,1,fp8,fp8,0,0.028688001135985058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,float16,0,0.030063999195893604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,1,128,0,1,float16,fp8,0,0.029450667401154835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,float16,0,0.029578665892283123
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,1,128,0,1,fp8,fp8,0,0.027189334233601887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,2,128,0,1,float16,fp8,0,0.03186666717131933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,4,2,128,0,1,fp8,fp8,0,0.029365333418051403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,float16,0,0.02741866558790207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,4,128,0,1,float16,fp8,0,0.027242665489514668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,4,128,0,1,fp8,fp8,0,0.026687999566396076
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,float16,0,0.02735466758410136
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,1,128,0,1,float16,fp8,0,0.02593066543340683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,1,128,0,1,fp8,fp8,0,0.025370667378107708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,float16,0,0.02734400083621343
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,2,128,0,1,float16,fp8,0,0.026928000152111053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,4,2,128,0,1,fp8,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,float16,0,0.025472000241279602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,4,128,0,1,fp8,fp8,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,4,128,0,1,float16,fp8,0,0.027445333699385326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,float16,0,0.02535466601451238
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,1,128,0,1,fp8,fp8,0,0.023381332556406658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,1,128,0,1,float16,fp8,0,0.02537599951028824
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,float16,0,0.025578667720158894
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,2,128,0,1,fp8,fp8,0,0.023183998962243397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,4,2,128,0,1,float16,fp8,0,0.02510400116443634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,1,128,0,1,float16,float16,0,0.2566346724828084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,1,128,0,1,float16,fp8,0,0.25250667333602905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,1,128,0,1,fp8,fp8,0,0.2658560077349345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,2,128,0,1,float16,float16,0,0.2686079939206441
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,2,128,0,1,float16,fp8,0,0.2648959954579671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,4,2,128,0,1,fp8,fp8,0,0.27110934257507324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,4,128,0,1,float16,float16,0,0.15660799543062845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,4,128,0,1,float16,fp8,0,0.15466133753458658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,4,128,0,1,fp8,fp8,0,0.1532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,1,128,0,1,float16,float16,0,0.13657599687576294
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,1,128,0,1,float16,fp8,0,0.1346506675084432
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,1,128,0,1,fp8,fp8,0,0.14180800318717957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,2,128,0,1,float16,float16,0,0.14401066303253174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,2,128,0,1,float16,fp8,0,0.1418239971001943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,4,2,128,0,1,fp8,fp8,0,0.14506133397420248
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,4,128,0,1,float16,float16,0,0.0911253293355306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,4,128,0,1,float16,fp8,0,0.0869599978129069
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,4,128,0,1,fp8,fp8,0,0.08711466193199158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,1,128,0,1,float16,float16,0,0.07550933460394542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,1,128,0,1,float16,fp8,0,0.07372266550858815
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,1,128,0,1,fp8,fp8,0,0.07272533575693767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,2,128,0,1,float16,float16,0,0.07878933350245158
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,2,128,0,1,float16,fp8,0,0.0767146646976471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,4,2,128,0,1,fp8,fp8,0,0.07961600025494893
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,4,128,0,1,float16,float16,0,0.047040000557899475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,4,128,0,1,float16,fp8,0,0.04818133513132731
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,4,128,0,1,fp8,fp8,0,0.04790399968624115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,1,128,0,1,float16,float16,0,0.045610666275024414
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,1,128,0,1,float16,fp8,0,0.04437866806983948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,1,128,0,1,fp8,fp8,0,0.04199466605981191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,2,128,0,1,float16,float16,0,0.04562133550643921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,2,128,0,1,float16,fp8,0,0.04604266583919525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,4,2,128,0,1,fp8,fp8,0,0.04370133578777313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,4,128,0,1,float16,float16,0,0.03177600105603536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,4,128,0,1,float16,fp8,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,4,128,0,1,fp8,fp8,0,0.030080000559488933
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,1,128,0,1,float16,float16,0,0.03152533372243246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,1,128,0,1,float16,fp8,0,0.029274667302767437
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,1,128,0,1,fp8,fp8,0,0.02863466739654541
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,2,128,0,1,float16,float16,0,0.031632001201311745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,2,128,0,1,float16,fp8,0,0.031354665756225586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,4,2,128,0,1,fp8,fp8,0,0.029487999776999157
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,4,128,0,1,float16,float16,0,0.025631998976071674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,4,128,0,1,float16,fp8,0,0.025424001117547352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,4,128,0,1,fp8,fp8,0,0.02514133354028066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,1,128,0,1,float16,float16,0,0.026047999660174053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,1,128,0,1,float16,fp8,0,0.0252960001428922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,1,128,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,2,128,0,1,float16,float16,0,0.02516266703605652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,2,128,0,1,float16,fp8,0,0.025568000972270966
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,4,128,0,1,float16,fp8,0,0.023413332800070446
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,4,2,128,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,4,128,0,1,float16,float16,0,0.02144533395767212
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,1,128,0,1,float16,float16,0,0.023141334454218548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,1,128,0,1,float16,fp8,0,0.023034666975339253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,1,128,0,1,fp8,fp8,0,0.02109866589307785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,4,128,0,1,fp8,fp8,0,0.021477334201335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,2,128,0,1,float16,float16,0,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,2,128,0,1,float16,fp8,0,0.023157333334287006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,4,2,128,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,4,128,0,1,float16,float16,0,0.02141333371400833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,4,128,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,4,128,0,1,fp8,fp8,0,0.021338666478792827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,1,128,0,1,float16,float16,0,0.02142400046189626
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,1,128,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,1,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,2,128,0,1,float16,float16,0,0.021354667842388153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,2,128,0,1,float16,fp8,0,0.021498667697111767
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,4,2,128,0,1,fp8,fp8,0,0.020869334538777668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,4,128,0,1,float16,float16,0,0.021146667500336964
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,4,128,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,1,128,0,1,float16,float16,0,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,4,128,0,1,fp8,fp8,0,0.01932266727089882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,1,128,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,1,128,0,1,fp8,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,2,128,0,1,float16,float16,0,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,2,128,0,1,float16,fp8,0,0.021216000119845074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,4,2,128,0,1,fp8,fp8,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,1,128,0,1,float16,float16,0,0.12326400478680928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,1,128,0,1,float16,fp8,0,0.12325867017110188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,1,128,0,1,fp8,fp8,0,0.1351626714070638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,2,128,0,1,float16,float16,0,0.1291146675745646
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,4,128,0,1,float16,float16,0,0.08520000179608662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,2,128,0,1,float16,fp8,0,0.1267733375231425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,4,128,0,1,float16,fp8,0,0.08049599826335907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,4,2,128,0,1,fp8,fp8,0,0.1381119986375173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,4,128,0,1,fp8,fp8,0,0.08229866623878479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,1,128,0,1,float16,fp8,0,0.06797333558400472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,1,128,0,1,float16,float16,0,0.0699946681658427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,1,128,0,1,fp8,fp8,0,0.07047466437021892
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,2,128,0,1,float16,float16,0,0.07214933137098949
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,2,128,0,1,float16,fp8,0,0.07062399884064992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,4,128,0,1,float16,float16,0,0.04363733530044556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,4,2,128,0,1,fp8,fp8,0,0.07712533573309581
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,4,128,0,1,float16,fp8,0,0.04205333193143209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,4,128,0,1,fp8,fp8,0,0.04326933125654856
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,1,128,0,1,float16,float16,0,0.0399893323580424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,1,128,0,1,float16,fp8,0,0.040287998815377556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,1,128,0,1,fp8,fp8,0,0.038149334490299225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,2,128,0,1,float16,float16,0,0.03989866624275843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,2,128,0,1,float16,fp8,0,0.039781334499518074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,4,2,128,0,1,fp8,fp8,0,0.039887999494870506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,4,128,0,1,float16,float16,0,0.0278613343834877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,4,128,0,1,float16,fp8,0,0.027930667002995808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,4,128,0,1,fp8,fp8,0,0.02884799987077713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,1,128,0,1,float16,float16,0,0.02756800005833308
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,1,128,0,1,float16,fp8,0,0.027535999814669292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,1,128,0,1,fp8,fp8,0,0.027674667537212372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,2,128,0,1,float16,float16,0,0.027530667682488758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,2,128,0,1,float16,fp8,0,0.028330666323502857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,4,2,128,0,1,fp8,fp8,0,0.027434666951497395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,4,128,0,1,float16,float16,0,0.023573334018389385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,4,128,0,1,float16,fp8,0,0.023344000180562336
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,4,128,0,1,fp8,fp8,0,0.023200000325838726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,1,128,0,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,1,128,0,1,float16,fp8,0,0.021381333470344543
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,1,128,0,1,fp8,fp8,0,0.022650666534900665
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,2,128,0,1,float16,float16,0,0.022991999983787537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,2,128,0,1,float16,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,4,2,128,0,1,fp8,fp8,0,0.021365332106749218
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,4,128,0,1,float16,float16,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,4,128,0,1,float16,fp8,0,0.019178666174411774
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,4,128,0,1,fp8,fp8,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,1,128,0,1,float16,fp8,0,0.021157334248224895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,1,128,0,1,float16,float16,0,0.0210506667693456
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,1,128,0,1,fp8,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,2,128,0,1,float16,float16,0,0.021162666380405426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,2,128,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,4,2,128,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,4,128,0,1,float16,fp8,0,0.019744000087181728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,4,128,0,1,float16,float16,0,0.019530666371186573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,4,128,0,1,fp8,fp8,0,0.018805333723624546
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,1,128,0,1,fp8,fp8,0,0.018629333625237148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,1,128,0,1,float16,float16,0,0.018778666853904724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,1,128,0,1,float16,fp8,0,0.018981333822011948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,2,128,0,1,float16,float16,0,0.01894933357834816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,2,128,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,4,2,128,0,1,fp8,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,4,128,0,1,float16,float16,0,0.018944000204404194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,4,128,0,1,float16,fp8,0,0.019189332922299702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,4,128,0,1,fp8,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,1,128,0,1,float16,float16,0,0.01971199984351794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,1,128,0,1,float16,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,1,128,0,1,fp8,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,2,128,0,1,float16,float16,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,2,128,0,1,float16,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,4,2,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,4,128,0,1,float16,float16,0,0.01930133377512296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,4,128,0,1,float16,fp8,0,0.019434666881958645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,4,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,1,128,0,1,float16,float16,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,1,128,0,1,float16,fp8,0,0.01934933289885521
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,1,128,0,1,fp8,fp8,0,0.01729600007335345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,2,128,0,1,float16,float16,0,0.01937066639463107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,2,128,0,1,float16,fp8,0,0.019215999792019527
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,4,2,128,0,1,fp8,fp8,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,1,128,0,1,float16,float16,0,0.07147733370463054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,1,128,0,1,float16,fp8,0,0.07066133121649425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,1,128,0,1,fp8,fp8,0,0.07192533214886983
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,2,128,0,1,float16,float16,0,0.07448000212510426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,2,128,0,1,float16,fp8,0,0.07222400108973186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,4,2,128,0,1,fp8,fp8,0,0.08075200021266937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,4,128,0,1,float16,float16,0,0.04419200122356415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,4,128,0,1,float16,fp8,0,0.04383466641108195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,4,128,0,1,fp8,fp8,0,0.04624533156553904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,1,128,0,1,float16,float16,0,0.04345599810282389
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,1,128,0,1,float16,fp8,0,0.043893332282702126
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,1,128,0,1,fp8,fp8,0,0.03999999910593033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,2,128,0,1,float16,float16,0,0.043562665581703186
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,2,128,0,1,float16,fp8,0,0.044218664367993675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,4,128,0,1,float16,float16,0,0.02959999938805898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,4,128,0,1,float16,fp8,0,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,4,2,128,0,1,fp8,fp8,0,0.041519999504089355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,1,128,0,1,float16,fp8,0,0.029301332930723827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,1,128,0,1,float16,float16,0,0.02829866607983907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,1,128,0,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,4,128,0,1,fp8,fp8,0,0.030224000414212544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,2,128,0,1,float16,float16,0,0.029317334294319153
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,2,128,0,1,float16,fp8,0,0.02889599899450938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,4,2,128,0,1,fp8,fp8,0,0.027093333502610523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,4,128,0,1,float16,float16,0,0.021776000658671062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,4,128,0,1,float16,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,4,128,0,1,fp8,fp8,0,0.02089600016673406
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,1,128,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,1,128,0,1,float16,fp8,0,0.02161066730817159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,1,128,0,1,fp8,fp8,0,0.020981334149837494
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,2,128,0,1,float16,float16,0,0.01978133370478948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,2,128,0,1,float16,fp8,0,0.021295999487241108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,4,2,128,0,1,fp8,fp8,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,4,128,0,1,float16,float16,0,0.019018666197856266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,4,128,0,1,float16,fp8,0,0.019066666563351948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,4,128,0,1,fp8,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,1,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,1,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,1,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,2,128,0,1,float16,float16,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,2,128,0,1,float16,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,4,2,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,4,128,0,1,float16,float16,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,4,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,4,128,0,1,fp8,fp8,0,0.017781333376963932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,1,128,0,1,float16,float16,0,0.016063999384641647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,1,128,0,1,float16,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,1,128,0,1,fp8,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,2,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,2,128,0,1,float16,fp8,0,0.017386666188637417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,4,2,128,0,1,fp8,fp8,0,0.017093333105246227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,4,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,4,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,4,128,0,1,fp8,fp8,0,0.01714133347074191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,1,128,0,1,float16,float16,0,0.01681600014368693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,1,128,0,1,float16,fp8,0,0.01700266698996226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,1,128,0,1,fp8,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,2,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,2,128,0,1,float16,fp8,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,4,2,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,4,128,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,4,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,4,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,1,128,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,1,128,0,1,float16,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,1,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,2,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,2,128,0,1,fp8,fp8,0,0.015376000354687372
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,4,2,128,0,1,float16,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,4,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,4,128,0,1,float16,fp8,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,4,128,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,1,128,0,1,float16,float16,0,0.016949333250522614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,1,128,0,1,float16,fp8,0,0.01724799970785777
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,1,128,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,2,128,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,2,128,0,1,float16,fp8,0,0.016917333006858826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,4,2,128,0,1,fp8,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,1,128,0,1,float16,float16,0,0.04986133178075155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,1,128,0,1,float16,fp8,0,0.04993066688378652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,1,128,0,1,fp8,fp8,0,0.04801600178082784
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,2,128,0,1,float16,float16,0,0.051914667089780174
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,2,128,0,1,float16,fp8,0,0.051488002141316734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,4,2,128,0,1,fp8,fp8,0,0.05008533100287119
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,4,128,0,1,float16,float16,0,0.03555733213822047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,4,128,0,1,float16,fp8,0,0.03526400029659271
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,4,128,0,1,fp8,fp8,0,0.033626665671666466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,1,128,0,1,float16,float16,0,0.03339199970165888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,1,128,0,1,float16,fp8,0,0.03401600072781245
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,1,128,0,1,fp8,fp8,0,0.0320266659061114
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,2,128,0,1,float16,fp8,0,0.03399466723203659
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,2,128,0,1,float16,float16,0,0.03382933388153712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,4,2,128,0,1,fp8,fp8,0,0.031583999594052635
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,4,128,0,1,float16,float16,0,0.023130667706330616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,4,128,0,1,float16,fp8,0,0.023242667317390442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,4,128,0,1,fp8,fp8,0,0.02311466634273529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,1,128,0,1,float16,float16,0,0.022991999983787537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,1,128,0,1,float16,fp8,0,0.02319466571013133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,1,128,0,1,fp8,fp8,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,2,128,0,1,float16,float16,0,0.02309866746266683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,2,128,0,1,float16,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,4,2,128,0,1,fp8,fp8,0,0.021562665700912476
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,4,128,0,1,float16,float16,0,0.017498667041460674
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,4,128,0,1,float16,fp8,0,0.01897066707412402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,4,128,0,1,fp8,fp8,0,0.0189280000825723
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,1,128,0,1,float16,float16,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,1,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,1,128,0,1,fp8,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,2,128,0,1,float16,float16,0,0.01756799966096878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,2,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,4,2,128,0,1,fp8,fp8,0,0.017221332838137943
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,4,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,4,128,0,1,fp8,fp8,0,0.014896000425020853
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,4,128,0,1,float16,float16,0,0.01611199975013733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,1,128,0,1,float16,float16,0,0.016970666746298473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,1,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,1,128,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,2,128,0,1,float16,fp8,0,0.016938666502634685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,2,128,0,1,float16,float16,0,0.016127999871969223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,4,128,0,1,float16,float16,0,0.017114666601022083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,4,2,128,0,1,fp8,fp8,0,0.017184000462293625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,4,128,0,1,float16,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,1,128,0,1,float16,float16,0,0.016613333175579708
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,4,128,0,1,fp8,fp8,0,0.015989333391189575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,1,128,0,1,float16,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,1,128,0,1,fp8,fp8,0,0.016927999754746754
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,2,128,0,1,float16,float16,0,0.017258666455745697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,2,128,0,1,float16,fp8,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,4,2,128,0,1,fp8,fp8,0,0.01613866661985715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,4,128,0,1,float16,float16,0,0.016778666526079178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,4,128,0,1,float16,fp8,0,0.017082666357358296
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,4,128,0,1,fp8,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,1,128,0,1,float16,float16,0,0.016271999726692837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,1,128,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,1,128,0,1,fp8,fp8,0,0.015322666615247726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,2,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,2,128,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,4,2,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,4,128,0,1,float16,float16,0,0.015002666662136713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,4,128,0,1,float16,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,4,128,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,1,128,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,1,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,1,128,0,1,fp8,fp8,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,2,128,0,1,float16,float16,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,2,128,0,1,float16,fp8,0,0.017055999487638474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,4,2,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,4,128,0,1,float16,float16,0,0.015589332828919092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,4,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,4,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,1,128,0,1,float16,float16,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,1,128,0,1,float16,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,1,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,2,128,0,1,float16,float16,0,0.01570133368174235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,2,128,0,1,float16,fp8,0,0.014922666052977243
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,4,2,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,1,128,0,1,float16,float16,0,0.04188266893227895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,1,128,0,1,float16,fp8,0,0.04164266586303711
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,1,128,0,1,fp8,fp8,0,0.038933334251244865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,2,128,0,1,float16,float16,0,0.041834667325019836
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,2,128,0,1,float16,fp8,0,0.041877334316571556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,4,2,128,0,1,fp8,fp8,0,0.03845333307981491
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,4,128,0,1,float16,float16,0,0.027957332630952198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,4,128,0,1,float16,fp8,0,0.029258665939172108
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,4,128,0,1,fp8,fp8,0,0.027424000203609467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,1,128,0,1,float16,float16,0,0.027744000156720478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,1,128,0,1,float16,fp8,0,0.027888000011444092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,1,128,0,1,fp8,fp8,0,0.02739733209212621
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,2,128,0,1,float16,float16,0,0.028629332780838013
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,2,128,0,1,float16,fp8,0,0.02719466636578242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,4,2,128,0,1,fp8,fp8,0,0.027349332968393963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,4,128,0,1,float16,float16,0,0.019381333142518997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,4,128,0,1,float16,fp8,0,0.019487999379634857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,4,128,0,1,fp8,fp8,0,0.021429332594076794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,1,128,0,1,float16,float16,0,0.01921066641807556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,1,128,0,1,float16,fp8,0,0.01939733326435089
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,1,128,0,1,fp8,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,2,128,0,1,float16,float16,0,0.01947733387351036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,2,128,0,1,float16,fp8,0,0.01916266605257988
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,4,2,128,0,1,fp8,fp8,0,0.019226666539907455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,4,128,0,1,float16,float16,0,0.01704000060757001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,4,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,4,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,1,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,1,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,1,128,0,1,float16,fp8,0,0.01682666689157486
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,2,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,2,128,0,1,float16,fp8,0,0.017050666113694508
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,4,2,128,0,1,fp8,fp8,0,0.01685333376129468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,4,128,0,1,float16,float16,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,4,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,4,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,1,128,0,1,float16,float16,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,1,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,1,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,2,128,0,1,float16,float16,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,2,128,0,1,float16,fp8,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,4,2,128,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,4,128,0,1,float16,float16,0,0.015184000134468079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,4,128,0,1,float16,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,4,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,1,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,1,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,1,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,2,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,2,128,0,1,float16,fp8,0,0.017237332959969837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,4,2,128,0,1,fp8,fp8,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,4,128,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,4,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,4,128,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,1,128,0,1,float16,float16,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,1,128,0,1,float16,fp8,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,1,128,0,1,fp8,fp8,0,0.014901333798964819
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,2,128,0,1,float16,float16,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,2,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,4,2,128,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,4,128,0,1,float16,float16,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,4,128,0,1,float16,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,4,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,1,128,0,1,float16,float16,0,0.020629333953062694
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,1,128,0,1,float16,fp8,0,0.016021333634853363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,1,128,0,1,fp8,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,2,128,0,1,float16,float16,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,2,128,0,1,fp8,fp8,0,0.015658666690190632
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,4,2,128,0,1,float16,fp8,0,0.01563199982047081
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,4,128,0,1,float16,float16,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,4,128,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,1,128,0,1,float16,float16,0,0.014933332800865173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,4,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,1,128,0,1,float16,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,1,128,0,1,fp8,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,2,128,0,1,float16,float16,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,2,128,0,1,float16,fp8,0,0.015728000551462173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,4,2,128,0,1,fp8,fp8,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,1,128,0,1,float16,float16,0,0.03551999976237615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,1,128,0,1,fp8,fp8,0,0.03331200033426285
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,1,128,0,1,float16,fp8,0,0.03566399961709976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,2,128,0,1,float16,float16,0,0.035536001125971474
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,2,128,0,1,float16,fp8,0,0.03587199995915095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,4,2,128,0,1,fp8,fp8,0,0.03552533437808355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,4,128,0,1,float16,float16,0,0.02514133354028066
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,4,128,0,1,float16,fp8,0,0.025226667523384094
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,4,128,0,1,fp8,fp8,0,0.023205332458019257
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,1,128,0,1,float16,float16,0,0.025583999852339428
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,1,128,0,1,float16,fp8,0,0.025237334271272022
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,1,128,0,1,fp8,fp8,0,0.023610666394233704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,2,128,0,1,float16,float16,0,0.025429333249727886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,2,128,0,1,float16,fp8,0,0.02568000058333079
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,4,2,128,0,1,fp8,fp8,0,0.024933333198229473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,4,128,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,4,128,0,1,float16,fp8,0,0.018960000326236088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,4,128,0,1,fp8,fp8,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,1,128,0,1,float16,float16,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,1,128,0,1,float16,fp8,0,0.01959466685851415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,2,128,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,1,128,0,1,fp8,fp8,0,0.018144000321626663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,2,128,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,4,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,4,2,128,0,1,fp8,fp8,0,0.018837332725524902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,4,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,1,128,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,4,128,0,1,fp8,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,1,128,0,1,fp8,fp8,0,0.01703466723362605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,1,128,0,1,float16,fp8,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,2,128,0,1,float16,float16,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,2,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,4,2,128,0,1,fp8,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,4,128,0,1,float16,float16,0,0.016074666132529575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,4,128,0,1,float16,fp8,0,0.015583999454975128
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,4,128,0,1,fp8,fp8,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,1,128,0,1,float16,float16,0,0.015466666469971338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,1,128,0,1,float16,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,1,128,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,2,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,2,128,0,1,float16,fp8,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,4,2,128,0,1,fp8,fp8,0,0.014858666807413101
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,4,128,0,1,float16,float16,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,4,128,0,1,float16,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,4,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,1,128,0,1,float16,float16,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,1,128,0,1,float16,fp8,0,0.016832000265518825
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,1,128,0,1,fp8,fp8,0,0.015333333363135656
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,2,128,0,1,float16,float16,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,2,128,0,1,float16,fp8,0,0.016789333273967106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,4,2,128,0,1,fp8,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,4,128,0,1,float16,float16,0,0.015125333021084467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,4,128,0,1,float16,fp8,0,0.016805333395799
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,4,128,0,1,fp8,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,1,128,0,1,float16,float16,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,1,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,1,128,0,1,fp8,fp8,0,0.01540800059835116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,2,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,2,128,0,1,float16,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,4,2,128,0,1,fp8,fp8,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,4,128,0,1,float16,fp8,0,0.01516266663869222
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,4,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,1,128,0,1,float16,float16,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,4,128,0,1,fp8,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,1,128,0,1,float16,fp8,0,0.01573866605758667
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,1,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,2,128,0,1,float16,float16,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,2,128,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,4,2,128,0,1,fp8,fp8,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,4,128,0,1,float16,float16,0,0.014975999792416891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,4,128,0,1,float16,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,4,128,0,1,fp8,fp8,0,0.01602666700879733
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,1,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,1,128,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,1,128,0,1,fp8,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,2,128,0,1,float16,float16,0,0.014773332824309668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,2,128,0,1,float16,fp8,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,4,2,128,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,0,0.03141333411137263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,1,128,0,1,fp8,fp8,0,0.03127466638882955
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,0,0.03145066648721695
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,0,0.03365333378314972
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,4,2,128,0,1,fp8,fp8,0,0.03147733211517334
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,0,0.02475200096766154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,0,0.02404266595840454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,4,128,0,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,0,0.023754666248957317
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,0,0.02345066765944163
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,1,128,0,1,fp8,fp8,0,0.023178666830062866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,0,0.02348800003528595
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,0,0.025013332565625507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,4,128,0,1,fp8,fp8,0,0.019237333287795384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,4,2,128,0,1,fp8,fp8,0,0.023056000471115112
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,0,0.02107200026512146
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,0,0.021274665991465252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,1,128,0,1,fp8,fp8,0,0.02057066683967908
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,0,0.021055998901526134
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,0,0.021541332205136616
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,4,2,128,0,1,fp8,fp8,0,0.01932799940307935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,0,0.01708799973130226
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,4,128,0,1,fp8,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,0,0.017024000485738117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,1,128,0,1,fp8,fp8,0,0.015765332927306492
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,0,0.01701333373785019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,4,2,128,0,1,fp8,fp8,0,0.015253332753976187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,0,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,0,0.017344000438849132
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,4,128,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,1,128,0,1,fp8,fp8,0,0.016250666230916977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,0,0.015541333705186844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,4,2,128,0,1,fp8,fp8,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,0,0.015226667126019796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,4,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,1,128,0,1,fp8,fp8,0,0.01661866654952367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,0,0.016421332955360413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,4,2,128,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,0,0.015397333850463232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,0,0.015306666493415833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,4,128,0,1,fp8,fp8,0,0.015568000574906668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,0,0.015008000036080679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,1,128,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,0,0.015386667102575302
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,4,2,128,0,1,fp8,fp8,0,0.014938666174809137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,4,128,0,1,fp8,fp8,0,0.01669866715868314
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,1,128,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,4,2,128,0,1,fp8,fp8,0,0.016864000509182613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,4,128,0,1,fp8,fp8,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,1,128,0,1,fp8,fp8,0,0.014959999670584997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,4,2,128,0,1,fp8,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,2,1,128,0,1,fp8,fp8,0,0.6543999910354614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,float16,0,0.7720692952473959
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16384,2,1,128,0,1,float16,fp8,0,0.7645440101623535
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,float16,0,0.45227734247843426
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,2,128,0,1,fp8,fp8,0,0.3849653402964274
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,2,128,0,1,float16,fp8,0,0.45070934295654297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,1,128,0,1,fp8,fp8,0,0.37774932384490967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,fp8,0,0.42685333887736004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,2,1,128,0,1,float16,float16,0,0.431818683942159
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,float16,0,0.27319467067718506
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,2,128,0,1,fp8,fp8,0,0.2435093323389689
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,2,128,0,1,float16,fp8,0,0.2714879910151164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,float16,0,0.26629332701365155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,1,128,0,1,fp8,fp8,0,0.23825599749883017
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,2,1,128,0,1,float16,fp8,0,0.26816533009211224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,float16,0,0.1832533280054728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,2,128,0,1,float16,fp8,0,0.18352532386779785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,2,128,0,1,fp8,fp8,0,0.16496533155441284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,float16,0,0.18111467361450195
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,1,128,0,1,float16,fp8,0,0.1814346710840861
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,2,1,128,0,1,fp8,fp8,0,0.1627679963906606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,float16,0,0.4742240111033122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,2,1,128,0,1,float16,fp8,0,0.48263998826344806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,float16,0,0.2961919903755188
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,12288,2,1,128,0,1,fp8,fp8,0,0.41281600793202716
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,2,128,0,1,float16,fp8,0,0.2918613354365031
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,2,128,0,1,fp8,fp8,0,0.25379733244578045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,float16,0,0.2718293269475301
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,1,128,0,1,float16,fp8,0,0.2733546694119771
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,float16,0,0.17517866690953574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,2,1,128,0,1,fp8,fp8,0,0.24381866057713827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,2,128,0,1,float16,fp8,0,0.17855999867121378
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,2,128,0,1,fp8,fp8,0,0.16098666191101074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,float16,0,0.17557867368062338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,1,128,0,1,float16,fp8,0,0.17520533005396524
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,2,1,128,0,1,fp8,fp8,0,0.15677332878112793
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,float16,0,0.14104533195495605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,2,128,0,1,float16,fp8,0,0.14108799894650778
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,2,128,0,1,fp8,fp8,0,0.12800533572832742
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,float16,0,0.13819199800491333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,1,128,0,1,float16,fp8,0,0.1399679978688558
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,2,1,128,0,1,fp8,fp8,0,0.1267039974530538
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,float16,0,0.3582719961802165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,2,1,128,0,1,float16,fp8,0,0.3587253491083781
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,10240,2,1,128,0,1,fp8,fp8,0,0.31540799140930176
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,float16,0,0.22431999444961548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,2,128,0,1,float16,fp8,0,0.22231467564900717
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,2,128,0,1,fp8,fp8,0,0.1994933287302653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,float16,0,0.2109973430633545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,1,128,0,1,float16,fp8,0,0.21328532695770264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,2,1,128,0,1,fp8,fp8,0,0.19003732999165854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,float16,0,0.13382400075594583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,2,128,0,1,float16,fp8,0,0.1363093356291453
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,2,128,0,1,fp8,fp8,0,0.12160533666610718
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,float16,0,0.13222933808962503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,1,128,0,1,float16,fp8,0,0.13449600338935852
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,2,1,128,0,1,fp8,fp8,0,0.11960533261299133
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,float16,0,0.11978666981061299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,2,128,0,1,float16,fp8,0,0.11999467015266418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,float16,0,0.11958932876586914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,2,128,0,1,fp8,fp8,0,0.10873599847157796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,1,128,0,1,float16,fp8,0,0.11954133709271748
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,2,1,128,0,1,fp8,fp8,0,0.10826667149861653
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,float16,0,0.45892266432444256
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,2,1,128,0,1,float16,fp8,0,0.46160535017649335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,8192,2,1,128,0,1,fp8,fp8,0,0.39485331376393634
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,float16,0,0.2759360074996948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,2,128,0,1,float16,fp8,0,0.2759893337885539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,2,128,0,1,fp8,fp8,0,0.23685866594314575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,float16,0,0.2553760011990865
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,1,128,0,1,fp8,fp8,0,0.22823466857274374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,2,1,128,0,1,float16,fp8,0,0.2548533280690511
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,float16,0,0.1588053305943807
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,2,128,0,1,float16,fp8,0,0.16010133425394693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,2,128,0,1,fp8,fp8,0,0.14621866742769876
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,float16,0,0.1545973320802053
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,1,128,0,1,float16,fp8,0,0.15702399611473083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,2,1,128,0,1,fp8,fp8,0,0.14147200187047324
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,float16,0,0.1053600013256073
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,2,128,0,1,float16,fp8,0,0.10546666383743286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,2,128,0,1,fp8,fp8,0,0.0969546635945638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,float16,0,0.10382399956385295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,1,128,0,1,float16,fp8,0,0.10347732901573181
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,2,1,128,0,1,fp8,fp8,0,0.09363733728726704
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,float16,0,0.09917333722114563
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,2,128,0,1,float16,fp8,0,0.09946667154630025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,2,128,0,1,fp8,fp8,0,0.09012266993522644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,float16,0,0.0990826686223348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,1,128,0,1,float16,fp8,0,0.09872532884279887
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,2,1,128,0,1,fp8,fp8,0,0.09011733531951904
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,fp8,0,0.3004213372866313
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,float16,0,0.1851466695467631
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,2,1,128,0,1,float16,float16,0,0.2996053298314412
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,6144,2,1,128,0,1,fp8,fp8,0,0.26202666759490967
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,2,128,0,1,float16,fp8,0,0.18413867553075156
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,2,128,0,1,fp8,fp8,0,0.16089066863059998
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,float16,0,0.16595199704170227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,1,128,0,1,float16,fp8,0,0.16318933169047037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,fp8,0,0.10891733566919963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,2,128,0,1,float16,float16,0,0.10604799787203471
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,2,128,0,1,fp8,fp8,0,0.09833066662152608
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,2,1,128,0,1,fp8,fp8,0,0.1520960032939911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,float16,0,0.10632000366846721
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,1,128,0,1,float16,fp8,0,0.10614400108655293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,2,1,128,0,1,fp8,fp8,0,0.09563733140627544
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,float16,0,0.08143466711044312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,2,128,0,1,float16,fp8,0,0.0827946662902832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,2,128,0,1,fp8,fp8,0,0.07522133489449818
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,fp8,0,0.08285866677761078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,1,128,0,1,float16,float16,0,0.08153066535790761
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,2,1,128,0,1,fp8,fp8,0,0.07459733386834462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,float16,0,0.07890133559703827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,2,128,0,1,float16,fp8,0,0.07896000146865845
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,2,128,0,1,fp8,fp8,0,0.07102933526039124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,float16,0,0.07666666805744171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,1,128,0,1,float16,fp8,0,0.07876266539096832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,2,1,128,0,1,fp8,fp8,0,0.07063999772071838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,float16,0,0.31539734204610187
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,float16,0,0.19024000565210977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,2,1,128,0,1,float16,fp8,0,0.3177280028661092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,4096,2,1,128,0,1,fp8,fp8,0,0.27854400873184204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,2,128,0,1,float16,fp8,0,0.1911733349164327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,float16,0,0.17036799589792886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,2,128,0,1,fp8,fp8,0,0.16591466466585794
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,1,128,0,1,float16,fp8,0,0.17036267121632895
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,float16,0,0.10364799698193868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,2,128,0,1,float16,fp8,0,0.10466133554776509
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,2,1,128,0,1,fp8,fp8,0,0.15587733189264932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,2,128,0,1,fp8,fp8,0,0.09741866588592529
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,float16,0,0.099973330895106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,1,128,0,1,fp8,fp8,0,0.09124799569447835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,float16,0,0.06558399895826976
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,2,1,128,0,1,float16,fp8,0,0.10025599598884583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,2,128,0,1,float16,fp8,0,0.06638399759928386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,2,128,0,1,fp8,fp8,0,0.062208001812299095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,float16,0,0.06615466872851054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,1,128,0,1,float16,fp8,0,0.06640533109505971
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,2,1,128,0,1,fp8,fp8,0,0.060496002435684204
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,float16,0,0.05991466840108236
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,2,128,0,1,float16,fp8,0,0.06046933432420095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,2,128,0,1,fp8,fp8,0,0.05612266560395559
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,float16,0,0.060319999853769936
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,1,128,0,1,float16,fp8,0,0.06043200194835663
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,2,1,128,0,1,fp8,fp8,0,0.055488000313440956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,float16,0,0.05830933153629303
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,2,128,0,1,float16,fp8,0,0.058117335041364036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,2,128,0,1,fp8,fp8,0,0.053727999329566956
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,float16,0,0.05728533367315928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,1,128,0,1,float16,fp8,0,0.05701333284378052
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,2,1,128,0,1,fp8,fp8,0,0.05211733281612396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,float16,0,0.21236266692479452
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,2,1,128,0,1,float16,fp8,0,0.21107733249664307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,3072,2,1,128,0,1,fp8,fp8,0,0.1871253252029419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,float16,0,0.12690666317939758
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,2,128,0,1,float16,fp8,0,0.1267573336760203
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,2,128,0,1,fp8,fp8,0,0.11617599924405415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,float16,0,0.11101333300272624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,1,128,0,1,fp8,fp8,0,0.10589866836865743
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,2,1,128,0,1,float16,fp8,0,0.11224533120791118
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,float16,0,0.0722453345855077
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,2,128,0,1,float16,fp8,0,0.07500799993673961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,2,128,0,1,fp8,fp8,0,0.06709333260854085
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,float16,0,0.0710346649090449
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,float16,0,0.054101333022117615
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,1,128,0,1,float16,fp8,0,0.07249600191911061
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,2,1,128,0,1,fp8,fp8,0,0.06458666423956554
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,2,128,0,1,float16,fp8,0,0.05455466608206431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,float16,0,0.052341332038243614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,2,128,0,1,fp8,fp8,0,0.04983466863632202
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,1,128,0,1,float16,fp8,0,0.05215999980767568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,float16,0,0.050197333097457886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,2,1,128,0,1,fp8,fp8,0,0.048341333866119385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,2,128,0,1,float16,fp8,0,0.049551998575528465
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,2,128,0,1,fp8,fp8,0,0.046165332198143005
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,float16,0,0.05012799799442291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,1,128,0,1,float16,fp8,0,0.04986133178075155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,2,1,128,0,1,fp8,fp8,0,0.0459199994802475
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,float16,0,0.04780266682306925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,2,128,0,1,float16,fp8,0,0.04814933240413666
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,2,128,0,1,fp8,fp8,0,0.042319998145103455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,float16,0,0.047775998711586
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,1,128,0,1,float16,fp8,0,0.04798933366934458
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,2,1,128,0,1,fp8,fp8,0,0.042768001556396484
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,float16,0,0.23577600717544556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,2,1,128,0,1,float16,fp8,0,0.23673067490259805
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,2048,2,1,128,0,1,fp8,fp8,0,0.21237866083780924
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,float16,0,0.14046933253606161
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,2,128,0,1,float16,fp8,0,0.14071466525395712
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,2,128,0,1,fp8,fp8,0,0.1269760032494863
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,float16,0,0.12693867087364197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,1,128,0,1,float16,fp8,0,0.1273973286151886
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,2,1,128,0,1,fp8,fp8,0,0.11754133303960164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,fp8,0,0.07610133290290833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,2,128,0,1,float16,float16,0,0.07627200086911519
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,2,128,0,1,fp8,fp8,0,0.0726453314224879
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,float16,0,0.07063999772071838
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,1,128,0,1,float16,fp8,0,0.07278400162855785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,2,1,128,0,1,fp8,fp8,0,0.06622933348019917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,float16,0,0.04785066843032837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,2,128,0,1,float16,fp8,0,0.04991999765237173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,2,128,0,1,fp8,fp8,0,0.04534933467706045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,float16,0,0.048058668772379555
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,1,128,0,1,float16,fp8,0,0.04766400158405304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,float16,0,0.04218666752179464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,2,1,128,0,1,fp8,fp8,0,0.04359466830889384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,2,128,0,1,float16,fp8,0,0.041797334949175514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,2,128,0,1,fp8,fp8,0,0.04011200120051702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,float16,0,0.04098666707674662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,1,128,0,1,float16,fp8,0,0.04181333382924398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,2,1,128,0,1,fp8,fp8,0,0.038191998998324074
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,float16,0,0.037776000797748566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,2,128,0,1,fp8,fp8,0,0.03542399903138479
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,2,128,0,1,float16,fp8,0,0.04001600046952566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,float16,0,0.03781333317359289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,1,128,0,1,float16,fp8,0,0.038245332737763725
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,2,1,128,0,1,fp8,fp8,0,0.035818666219711304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,fp8,0,0.037578667203585304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,2,128,0,1,fp8,fp8,0,0.03402666747570038
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,2,128,0,1,float16,float16,0,0.03805333375930786
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,float16,0,0.03764266769091288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,1,128,0,1,float16,fp8,0,0.03777066618204117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,2,1,128,0,1,fp8,fp8,0,0.03363733241955439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,fp8,0,0.16300266981124878
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,float16,0,0.09611733754475911
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,2,1,128,0,1,float16,float16,0,0.16132799784342447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1536,2,1,128,0,1,fp8,fp8,0,0.1478506624698639
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,2,128,0,1,float16,fp8,0,0.09955199559529622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,2,128,0,1,fp8,fp8,0,0.09156800309816997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,fp8,0,0.08705066641171773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,1,128,0,1,float16,float16,0,0.0849120020866394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,fp8,0,0.05794133245944977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,2,128,0,1,float16,float16,0,0.056330665946006775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,2,1,128,0,1,fp8,fp8,0,0.08197866876920064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,2,128,0,1,fp8,fp8,0,0.054602667689323425
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,float16,0,0.0543093333641688
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,1,128,0,1,float16,fp8,0,0.05646933118502299
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,2,1,128,0,1,fp8,fp8,0,0.05147199829419454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,float16,0,0.040287998815377556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,2,128,0,1,float16,fp8,0,0.040591999888420105
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,2,128,0,1,fp8,fp8,0,0.03791466603676478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,float16,0,0.039861333866914116
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,1,128,0,1,float16,fp8,0,0.040250666439533234
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,2,1,128,0,1,fp8,fp8,0,0.036720000207424164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,float16,0,0.03565866748491923
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,2,128,0,1,float16,fp8,0,0.03569599986076355
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,2,128,0,1,fp8,fp8,0,0.03149333347876867
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,float16,0,0.033914667864640556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,1,128,0,1,float16,fp8,0,0.03442133218050003
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,2,1,128,0,1,fp8,fp8,0,0.031498665610949196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,float16,0,0.03164800008138021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,2,128,0,1,float16,fp8,0,0.03170666595300039
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,2,128,0,1,fp8,fp8,0,0.029738667110602062
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,float16,0,0.03161599983771642
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,1,128,0,1,float16,fp8,0,0.03196800003449122
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,2,1,128,0,1,fp8,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,float16,0,0.03143466760714849
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,2,128,0,1,float16,fp8,0,0.03173333406448364
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,2,128,0,1,fp8,fp8,0,0.02924799919128418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,float16,0,0.031514666974544525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,1,128,0,1,fp8,fp8,0,0.02940266579389572
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,2,1,128,0,1,float16,fp8,0,0.03236799935499827
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,float16,0,0.1646506687005361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,2,1,128,0,1,float16,fp8,0,0.1622773309548696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1024,2,1,128,0,1,fp8,fp8,0,0.163290669520696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,float16,0,0.10246400038401286
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,2,128,0,1,float16,fp8,0,0.09790399670600891
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,2,128,0,1,fp8,fp8,0,0.09719467163085938
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,float16,0,0.08988266189893086
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,1,128,0,1,float16,fp8,0,0.08742400010426839
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,2,1,128,0,1,fp8,fp8,0,0.08954667051633199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,float16,0,0.05485333502292633
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,2,128,0,1,float16,fp8,0,0.053904001911481224
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,2,128,0,1,fp8,fp8,0,0.05411200225353241
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,float16,0,0.051781331499417625
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,1,128,0,1,float16,fp8,0,0.05233600238958994
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,2,1,128,0,1,fp8,fp8,0,0.04868799944718679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,float16,0,0.033717334270477295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,2,128,0,1,float16,fp8,0,0.035877334574858345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,2,128,0,1,fp8,fp8,0,0.033728001018365227
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,float16,0,0.03383466601371765
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,1,128,0,1,float16,fp8,0,0.033674667278925575
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,2,1,128,0,1,fp8,fp8,0,0.03364799916744232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,float16,0,0.02956266701221466
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,2,128,0,1,float16,fp8,0,0.029882666965325672
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,2,128,0,1,fp8,fp8,0,0.028330666323502857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,float16,0,0.029290666182835896
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,1,128,0,1,float16,fp8,0,0.029685333371162415
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,float16,0,0.02752000093460083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,2,1,128,0,1,fp8,fp8,0,0.027855999767780304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,2,128,0,1,float16,fp8,0,0.02758399893840154
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,float16,0,0.02712533374627431
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,2,128,0,1,fp8,fp8,0,0.025775998830795288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,1,128,0,1,float16,fp8,0,0.02771199991305669
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,2,1,128,0,1,fp8,fp8,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,float16,0,0.027274665733178455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,2,128,0,1,float16,fp8,0,0.027221334477265675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,2,128,0,1,fp8,fp8,0,0.025413334369659424
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,float16,0,0.027317332724730175
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,1,128,0,1,float16,fp8,0,0.025301332275072735
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,2,1,128,0,1,fp8,fp8,0,0.025285333395004272
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,float16,0,0.025701334079106648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,2,128,0,1,float16,fp8,0,0.025407999753952026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,2,128,0,1,fp8,fp8,0,0.024304000039895374
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,float16,0,0.025536000728607178
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,1,128,0,1,float16,fp8,0,0.02516799916823705
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,2,1,128,0,1,fp8,fp8,0,0.024314666787783306
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,2,1,128,0,1,float16,float16,0,0.14124266306559244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,2,1,128,0,1,float16,fp8,0,0.13703999916712442
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,512,2,1,128,0,1,fp8,fp8,0,0.14430399735768637
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,2,128,0,1,float16,float16,0,0.08757866422335307
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,2,128,0,1,float16,fp8,0,0.08534399668375652
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,2,128,0,1,fp8,fp8,0,0.08515200018882751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,1,128,0,1,float16,float16,0,0.07634133100509644
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,1,128,0,1,float16,fp8,0,0.0751146674156189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,2,128,0,1,float16,float16,0,0.04622933268547058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,2,1,128,0,1,fp8,fp8,0,0.08079466720422109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,2,128,0,1,float16,fp8,0,0.046037331223487854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,2,128,0,1,fp8,fp8,0,0.04942933221658071
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,1,128,0,1,float16,float16,0,0.04397333165009817
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,1,128,0,1,float16,fp8,0,0.044069334864616394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,2,1,128,0,1,fp8,fp8,0,0.04380266865094503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,2,128,0,1,float16,float16,0,0.02998399982849757
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,2,128,0,1,float16,fp8,0,0.030794667700926464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,2,128,0,1,fp8,fp8,0,0.029504001140594482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,1,128,0,1,float16,float16,0,0.029877332349618275
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,1,128,0,1,float16,fp8,0,0.030778666337331135
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,2,1,128,0,1,fp8,fp8,0,0.02977066735426585
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,2,128,0,1,float16,float16,0,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,2,128,0,1,float16,fp8,0,0.025759999950726826
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,2,128,0,1,fp8,fp8,0,0.0234400009115537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,1,128,0,1,float16,float16,0,0.025290665527184803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,1,128,0,1,float16,fp8,0,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,2,128,0,1,float16,float16,0,0.023402666052182514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,2,1,128,0,1,fp8,fp8,0,0.02493866781393687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,2,128,0,1,float16,fp8,0,0.02199466774861018
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,2,128,0,1,fp8,fp8,0,0.021231998999913532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,1,128,0,1,float16,float16,0,0.02312533309062322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,1,128,0,1,float16,fp8,0,0.021920000513394673
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,2,1,128,0,1,fp8,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,2,128,0,1,float16,float16,0,0.02139200021823247
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,2,128,0,1,float16,fp8,0,0.021141332884629566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,2,128,0,1,fp8,fp8,0,0.019248000035683315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,1,128,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,1,128,0,1,float16,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,2,1,128,0,1,fp8,fp8,0,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,2,128,0,1,float16,float16,0,0.021333334346612293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,2,128,0,1,float16,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,1,128,0,1,float16,float16,0,0.02160000056028366
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,2,128,0,1,fp8,fp8,0,0.021087999145189922
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,1,128,0,1,float16,fp8,0,0.020975999534130096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,2,1,128,0,1,fp8,fp8,0,0.021173333128293354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,2,128,0,1,float16,float16,0,0.02145066608985265
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,2,128,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,2,128,0,1,fp8,fp8,0,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,1,128,0,1,float16,float16,0,0.02110933264096578
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,1,128,0,1,float16,fp8,0,0.021151999632517498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,2,1,128,0,1,fp8,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,2,1,128,0,1,float16,float16,0,0.07158933579921722
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,2,1,128,0,1,fp8,fp8,0,0.0769760012626648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,256,2,1,128,0,1,float16,fp8,0,0.07074133555094402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,2,128,0,1,float16,float16,0,0.04178666571776072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,2,128,0,1,float16,fp8,0,0.041562666495641075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,2,128,0,1,fp8,fp8,0,0.044213334719340004
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,1,128,0,1,float16,float16,0,0.04205333193143209
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,1,128,0,1,float16,fp8,0,0.041989331444104515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,2,1,128,0,1,fp8,fp8,0,0.0377866675456365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,2,128,0,1,float16,float16,0,0.027765333652496338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,2,128,0,1,float16,fp8,0,0.027744000156720478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,2,128,0,1,fp8,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,1,128,0,1,float16,float16,0,0.027488000690937042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,1,128,0,1,float16,fp8,0,0.02757333219051361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,2,1,128,0,1,fp8,fp8,0,0.025205334027608235
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,2,128,0,1,float16,float16,0,0.023557332654794056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,2,128,0,1,float16,fp8,0,0.023562667270501454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,2,128,0,1,fp8,fp8,0,0.021488000949223835
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,1,128,0,1,float16,float16,0,0.023317334552605946
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,1,128,0,1,float16,fp8,0,0.022783999641736347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,2,1,128,0,1,fp8,fp8,0,0.021168000996112823
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,2,128,0,1,float16,float16,0,0.021061333517233532
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,2,128,0,1,float16,fp8,0,0.019744000087181728
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,2,128,0,1,fp8,fp8,0,0.019306667149066925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,1,128,0,1,float16,fp8,0,0.0195573332409064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,1,128,0,1,float16,float16,0,0.020986666282018025
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,2,1,128,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,2,128,0,1,float16,float16,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,2,128,0,1,float16,fp8,0,0.0194560003777345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,2,128,0,1,fp8,fp8,0,0.017605333278576534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,1,128,0,1,float16,float16,0,0.019519999623298645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,1,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,2,1,128,0,1,fp8,fp8,0,0.018800000349680584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,2,128,0,1,float16,float16,0,0.01937599976857503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,2,128,0,1,float16,fp8,0,0.0191040001809597
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,2,128,0,1,fp8,fp8,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,1,128,0,1,float16,float16,0,0.019194666296243668
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,1,128,0,1,float16,fp8,0,0.019365333020687103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,2,1,128,0,1,fp8,fp8,0,0.017477333545684814
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,2,128,0,1,float16,float16,0,0.018885333091020584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,2,128,0,1,float16,fp8,0,0.01950399950146675
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,2,128,0,1,fp8,fp8,0,0.0174346665541331
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,1,128,0,1,float16,float16,0,0.01905599981546402
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,1,128,0,1,float16,fp8,0,0.019098666807015736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,2,128,0,1,float16,float16,0,0.019402666638294857
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,2,1,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,2,128,0,1,float16,fp8,0,0.019029332945744198
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,2,128,0,1,fp8,fp8,0,0.017973333597183228
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,1,128,0,1,float16,float16,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,1,128,0,1,float16,fp8,0,0.01924266666173935
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,2,1,128,0,1,fp8,fp8,0,0.018351999421914417
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,2,1,128,0,1,float16,float16,0,0.04388799766699473
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,2,1,128,0,1,float16,fp8,0,0.0436160018046697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,128,2,1,128,0,1,fp8,fp8,0,0.04124266654253006
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,2,128,0,1,float16,float16,0,0.027386667827765148
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,2,128,0,1,float16,fp8,0,0.029296000798543293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,2,128,0,1,fp8,fp8,0,0.027749332288901012
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,1,128,0,1,float16,float16,0,0.029114666084448498
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,1,128,0,1,fp8,fp8,0,0.027509334186712902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,2,1,128,0,1,float16,fp8,0,0.029205332199732464
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,2,128,0,1,float16,float16,0,0.021503999829292297
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,2,128,0,1,float16,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,2,128,0,1,fp8,fp8,0,0.02165866643190384
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,1,128,0,1,float16,float16,0,0.02179199953873952
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,1,128,0,1,float16,fp8,0,0.02128533273935318
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,2,1,128,0,1,fp8,fp8,0,0.021130666136741638
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,2,128,0,1,float16,fp8,0,0.019023999571800232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,2,128,0,1,fp8,fp8,0,0.018730666488409042
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,2,128,0,1,float16,float16,0,0.019658666104078293
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,1,128,0,1,float16,float16,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,1,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,2,1,128,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,2,128,0,1,float16,fp8,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,2,128,0,1,float16,float16,0,0.018325333793958027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,2,128,0,1,fp8,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,1,128,0,1,float16,float16,0,0.01743999992807706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,1,128,0,1,float16,fp8,0,0.0170666662355264
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,2,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,2,1,128,0,1,fp8,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,2,128,0,1,float16,fp8,0,0.017301333447297413
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,2,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,1,128,0,1,float16,float16,0,0.01693333312869072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,1,128,0,1,fp8,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,2,1,128,0,1,float16,fp8,0,0.017279999951521557
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,2,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,2,128,0,1,float16,fp8,0,0.01740266631046931
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,1,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,2,128,0,1,fp8,fp8,0,0.016010666886965435
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,1,128,0,1,float16,fp8,0,0.017514667163292568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,2,1,128,0,1,fp8,fp8,0,0.015135999768972397
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,2,128,0,1,float16,float16,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,2,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,2,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,1,128,0,1,float16,fp8,0,0.01687466725707054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,1,128,0,1,float16,float16,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,2,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,2,1,128,0,1,fp8,fp8,0,0.015834666788578033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,2,128,0,1,float16,fp8,0,0.01722666621208191
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,2,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,1,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,1,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,2,1,128,0,1,fp8,fp8,0,0.016415999581416447
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,2,1,128,0,1,float16,float16,0,0.033600000043710075
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,2,1,128,0,1,float16,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,64,2,1,128,0,1,fp8,fp8,0,0.03313600023587545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,2,128,0,1,float16,float16,0,0.023605334262053173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,2,128,0,1,float16,fp8,0,0.02383466561635335
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,2,128,0,1,fp8,fp8,0,0.023408000667889912
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,1,128,0,1,float16,float16,0,0.02332799881696701
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,1,128,0,1,float16,fp8,0,0.023215999205907185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,2,1,128,0,1,fp8,fp8,0,0.02329600105683009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,2,128,0,1,float16,float16,0,0.017375999440749485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,2,128,0,1,float16,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,2,128,0,1,fp8,fp8,0,0.017360000560681026
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,1,128,0,1,float16,float16,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,1,128,0,1,float16,fp8,0,0.019205333044131596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,2,1,128,0,1,fp8,fp8,0,0.017450666675964992
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,2,128,0,1,float16,float16,0,0.01695466662446658
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,2,128,0,1,float16,fp8,0,0.016762666404247284
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,2,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,1,128,0,1,float16,float16,0,0.017152000218629837
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,1,128,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,2,1,128,0,1,float16,fp8,0,0.01621866722901662
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,2,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,2,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,2,128,0,1,fp8,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,1,128,0,1,float16,float16,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,1,128,0,1,float16,fp8,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,2,1,128,0,1,fp8,fp8,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,2,128,0,1,float16,float16,0,0.015210667004187902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,2,128,0,1,float16,fp8,0,0.016783999900023144
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,2,128,0,1,fp8,fp8,0,0.015018666783968607
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,1,128,0,1,float16,float16,0,0.01509333277742068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,1,128,0,1,float16,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,2,1,128,0,1,fp8,fp8,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,2,128,0,1,float16,float16,0,0.01728533332546552
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,2,128,0,1,float16,fp8,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,2,128,0,1,fp8,fp8,0,0.01607999950647354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,1,128,0,1,float16,float16,0,0.016858667135238647
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,1,128,0,1,float16,fp8,0,0.01721599946419398
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,2,1,128,0,1,fp8,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,2,128,0,1,float16,float16,0,0.01714666684468587
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,2,128,0,1,float16,fp8,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,2,128,0,1,fp8,fp8,0,0.01505600040157636
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,1,128,0,1,float16,float16,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,1,128,0,1,float16,fp8,0,0.015477333217859268
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,2,1,128,0,1,fp8,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,2,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,2,128,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,1,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,2,128,0,1,fp8,fp8,0,0.014783999572197596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,1,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,2,1,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,2,1,128,0,1,float16,float16,0,0.02735999971628189
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,2,1,128,0,1,fp8,fp8,0,0.027456000447273254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,32,2,1,128,0,1,float16,fp8,0,0.02777066578467687
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,2,128,0,1,float16,float16,0,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,2,128,0,1,float16,fp8,0,0.02102400114138921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,2,128,0,1,fp8,fp8,0,0.021194666624069214
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,1,128,0,1,float16,float16,0,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,1,128,0,1,float16,fp8,0,0.020970667401949566
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,2,1,128,0,1,fp8,fp8,0,0.021002667645613354
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,2,128,0,1,float16,float16,0,0.017103999853134155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,2,128,0,1,float16,fp8,0,0.017312000195185345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,2,128,0,1,fp8,fp8,0,0.017130666722853977
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,1,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,1,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,2,1,128,0,1,fp8,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,2,128,0,1,float16,float16,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,2,128,0,1,float16,fp8,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,2,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,1,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,1,128,0,1,fp8,fp8,0,0.015301333119471868
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,2,1,128,0,1,float16,float16,0,0.01588800052801768
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,2,128,0,1,float16,float16,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,2,128,0,1,float16,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,2,128,0,1,fp8,fp8,0,0.01692266638080279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,1,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,1,128,0,1,float16,float16,0,0.01581866666674614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,2,1,128,0,1,fp8,fp8,0,0.016048000504573185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,2,128,0,1,float16,float16,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,2,128,0,1,float16,fp8,0,0.01532799998919169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,1,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,1,128,0,1,float16,float16,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,2,128,0,1,fp8,fp8,0,0.01575999955336253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,2,1,128,0,1,fp8,fp8,0,0.01543466622630755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,2,128,0,1,float16,float16,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,2,128,0,1,float16,fp8,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,2,128,0,1,fp8,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,1,128,0,1,float16,float16,0,0.014943999548753103
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,1,128,0,1,float16,fp8,0,0.016127999871969223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,2,1,128,0,1,fp8,fp8,0,0.016997333616018295
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,2,128,0,1,float16,float16,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,2,128,0,1,fp8,fp8,0,0.015930666277805965
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,2,128,0,1,float16,fp8,0,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,1,128,0,1,float16,float16,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,1,128,0,1,float16,fp8,0,0.016879999389251072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,2,128,0,1,float16,float16,0,0.014949332922697067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,2,1,128,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,2,128,0,1,float16,fp8,0,0.015754666179418564
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,2,128,0,1,fp8,fp8,0,0.015061333775520325
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,1,128,0,1,float16,float16,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,1,128,0,1,float16,fp8,0,0.016229332735141117
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,2,1,128,0,1,fp8,fp8,0,0.014848000059525171
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,2,1,128,0,1,float16,fp8,0,0.025360000630219776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,2,1,128,0,1,float16,float16,0,0.025562666356563568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,16,2,1,128,0,1,fp8,fp8,0,0.023599999646345775
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,2,128,0,1,float16,float16,0,0.019333332777023315
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,2,128,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,2,128,0,1,float16,fp8,0,0.019167999426523846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,1,128,0,1,float16,float16,0,0.019093333433071773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,1,128,0,1,float16,fp8,0,0.019253333409627277
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,2,1,128,0,1,fp8,fp8,0,0.01918399954835574
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,2,128,0,1,float16,float16,0,0.015087999403476715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,2,128,0,1,float16,fp8,0,0.01587733378012975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,1,128,0,1,float16,float16,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,2,128,0,1,fp8,fp8,0,0.015530666957298914
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,1,128,0,1,float16,fp8,0,0.015520000209410986
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,2,1,128,0,1,fp8,fp8,0,0.01716800034046173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,2,128,0,1,float16,float16,0,0.015829333414634068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,2,128,0,1,float16,fp8,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,2,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,1,128,0,1,float16,float16,0,0.01571200042963028
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,1,128,0,1,fp8,fp8,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,2,1,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,2,128,0,1,float16,float16,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,2,128,0,1,float16,fp8,0,0.0169813334941864
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,2,128,0,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,1,128,0,1,float16,float16,0,0.015013333410024643
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,1,128,0,1,float16,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,2,1,128,0,1,fp8,fp8,0,0.01545599972208341
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,2,128,0,1,float16,float16,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,2,128,0,1,float16,fp8,0,0.016869333883126576
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,2,128,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,1,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,1,128,0,1,float16,fp8,0,0.017231999586025875
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,2,1,128,0,1,fp8,fp8,0,0.015034666905800501
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,2,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,2,128,0,1,float16,fp8,0,0.015317333241303762
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,2,128,0,1,fp8,fp8,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,1,128,0,1,float16,float16,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,1,128,0,1,float16,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,2,1,128,0,1,fp8,fp8,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,2,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,2,128,0,1,float16,fp8,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,2,128,0,1,fp8,fp8,0,0.015285332997639975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,1,128,0,1,float16,float16,0,0.015493333339691162
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,1,128,0,1,float16,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,2,1,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,2,128,0,1,float16,fp8,0,0.015146666516860327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,2,128,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,2,128,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,1,128,0,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,1,128,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,2,1,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,0,0.02513599892457326
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,0,0.025546667476495106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,256,1,2,1,128,0,1,fp8,fp8,0,0.023370665808518726
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,0,0.019733333339293797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,0,0.019050666441520054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,2,128,0,1,fp8,fp8,0,0.01903466631968816
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,0,0.019648000597953796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,0,0.021210665504137676
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,2,1,128,0,1,fp8,fp8,0,0.01926933353145917
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,0,0.017008000363906223
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,2,128,0,1,fp8,fp8,0,0.01735466718673706
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,0,0.01732800031701724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,0,0.01718933383623759
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,2,1,128,0,1,fp8,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,0,0.015370666980743408
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,0,0.01634666696190834
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,1,128,0,1,fp8,fp8,0,0.01590399940808614
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,2,2,128,0,1,fp8,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,0,0.01552533358335495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,2,128,0,1,fp8,fp8,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,0,0.01523200049996376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,0,0.015696000307798386
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,2,1,128,0,1,fp8,fp8,0,0.015119999647140503
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,0,0.015439999600251516
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,2,128,0,1,fp8,fp8,0,0.01551466683546702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,0,0.015423999478419622
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,0,0.016645333419243496
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,0,0.014890667051076889
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,0,0.01586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,2,1,128,0,1,fp8,fp8,0,0.01492799942692121
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,0,0.01562133307258288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,2,128,0,1,fp8,fp8,0,0.014864000181357065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,2,1,128,0,1,fp8,fp8,0,0.016106666376193363
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,2,128,0,1,fp8,fp8,0,0.015418666104475657
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,2,1,128,0,1,fp8,fp8,0,0.01515199989080429
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,2,128,0,1,fp8,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,0,0.015114666273196539
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,2,1,128,0,1,fp8,fp8,0,0.01522133375207583
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,float16,0,0.26632533470789593
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,1,1,128,0,1,float16,fp8,0,0.2702666719754537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16384,1,1,128,0,1,fp8,fp8,0,0.2390986680984497
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,float16,0,0.17796266078948975
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,1,1,128,0,1,float16,fp8,0,0.17907200256983438
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16384,1,1,128,0,1,fp8,fp8,0,0.15915200114250183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,float16,0,0.17286400000254312
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,1,1,128,0,1,float16,fp8,0,0.17275200287501016
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16384,1,1,128,0,1,fp8,fp8,0,0.15242666999499002
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,float16,0,0.1733013391494751
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,1,1,128,0,1,fp8,fp8,0,0.1566986640294393
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,12288,1,1,128,0,1,float16,fp8,0,0.1759893298149109
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,float16,0,0.13647466897964478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,1,1,128,0,1,float16,fp8,0,0.13801599542299905
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,12288,1,1,128,0,1,fp8,fp8,0,0.12452800075213115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,float16,0,0.13409066200256348
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,1,1,128,0,1,float16,fp8,0,0.13346133629480997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,12288,1,1,128,0,1,fp8,fp8,0,0.11972266435623169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,float16,0,0.13190399607022604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,1,1,128,0,1,fp8,fp8,0,0.12000532944997151
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,10240,1,1,128,0,1,float16,fp8,0,0.13288000226020813
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,float16,0,0.11754133303960164
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,1,1,128,0,1,fp8,fp8,0,0.10586133599281311
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,10240,1,1,128,0,1,float16,fp8,0,0.11784533659617107
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,float16,0,0.11346667011578877
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,1,1,128,0,1,float16,fp8,0,0.11372799674669902
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,10240,1,1,128,0,1,fp8,fp8,0,0.10147200028101604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,float16,0,0.1590933303038279
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,1,1,128,0,1,float16,fp8,0,0.1586666703224182
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,8192,1,1,128,0,1,fp8,fp8,0,0.14679466684659323
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,float16,0,0.10346666971842448
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,1,1,128,0,1,float16,fp8,0,0.10341333349545796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,8192,1,1,128,0,1,fp8,fp8,0,0.09505066275596619
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,float16,0,0.09733333190282185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,1,1,128,0,1,float16,fp8,0,0.09731733798980713
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,8192,1,1,128,0,1,fp8,fp8,0,0.08807999889055888
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,float16,0,0.09416533509890239
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,1,1,128,0,1,float16,fp8,0,0.09518933296203613
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,8192,1,1,128,0,1,fp8,fp8,0,0.0849226713180542
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,float16,0,0.10627733667691548
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,1,1,128,0,1,float16,fp8,0,0.10817066828409831
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,6144,1,1,128,0,1,fp8,fp8,0,0.09827199578285217
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,fp8,0,0.08084266881148021
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,1,1,128,0,1,float16,float16,0,0.08207466701666515
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,6144,1,1,128,0,1,fp8,fp8,0,0.07513066629568736
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,float16,0,0.07705066601435344
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,1,1,128,0,1,float16,fp8,0,0.07688533266385396
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,6144,1,1,128,0,1,fp8,fp8,0,0.0713866651058197
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,float16,0,0.07468266785144806
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,1,1,128,0,1,float16,fp8,0,0.07657599945863088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,6144,1,1,128,0,1,fp8,fp8,0,0.06797333558400472
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,fp8,0,0.10374400019645691
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,1,1,128,0,1,fp8,fp8,0,0.09893866380055745
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,4096,1,1,128,0,1,float16,float16,0,0.1049066682656606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,float16,0,0.06650666892528534
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,1,1,128,0,1,float16,fp8,0,0.06635199983914693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,4096,1,1,128,0,1,fp8,fp8,0,0.06228266656398773
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,fp8,0,0.060346667965253196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,1,1,128,0,1,float16,float16,0,0.06066666543483734
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,4096,1,1,128,0,1,fp8,fp8,0,0.05575466652711233
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,fp8,0,0.056202664971351624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,1,1,128,0,1,float16,float16,0,0.05726400017738342
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,4096,1,1,128,0,1,fp8,fp8,0,0.05219733218352
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,float16,0,0.05435733497142792
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,1,1,128,0,1,float16,fp8,0,0.05613866448402405
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,4096,1,1,128,0,1,fp8,fp8,0,0.04971200227737427
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,float16,0,0.07254933317502339
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,float16,0,0.0525493323802948
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,1,1,128,0,1,fp8,fp8,0,0.06964266796906789
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,3072,1,1,128,0,1,float16,fp8,0,0.07459733386834462
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,1,1,128,0,1,float16,fp8,0,0.05445333321889242
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,float16,0,0.04817600051561991
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,3072,1,1,128,0,1,fp8,fp8,0,0.05004266897837321
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,1,1,128,0,1,float16,fp8,0,0.050111999114354454
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,3072,1,1,128,0,1,fp8,fp8,0,0.04524800181388855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,float16,0,0.046021332343419395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,1,1,128,0,1,float16,fp8,0,0.04586133360862732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,3072,1,1,128,0,1,fp8,fp8,0,0.04186133543650309
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,float16,0,0.04602666695912679
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,1,1,128,0,1,fp8,fp8,0,0.040362666050593056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,3072,1,1,128,0,1,float16,fp8,0,0.046240001916885376
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,float16,0,0.07657599945863088
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,1,1,128,0,1,float16,fp8,0,0.07692799965540568
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,2048,1,1,128,0,1,fp8,fp8,0,0.07284266750017802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,float16,0,0.0476693312327067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,1,1,128,0,1,float16,fp8,0,0.04841599861780802
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,2048,1,1,128,0,1,fp8,fp8,0,0.04562133550643921
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,float16,0,0.04166933397452036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,1,1,128,0,1,float16,fp8,0,0.042064001162846885
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,2048,1,1,128,0,1,fp8,fp8,0,0.03977599988381068
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,float16,0,0.03764266769091288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,1,1,128,0,1,float16,fp8,0,0.037903999288876854
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,2048,1,1,128,0,1,fp8,fp8,0,0.036090667049090065
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,float16,0,0.036805334190527596
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,1,1,128,0,1,float16,fp8,0,0.03570133447647095
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,2048,1,1,128,0,1,fp8,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,float16,0,0.035936000446478523
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,1,1,128,0,1,float16,fp8,0,0.035418666899204254
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,2048,1,1,128,0,1,fp8,fp8,0,0.03346666693687439
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,float16,0,0.05654400090376536
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,1,1,128,0,1,float16,fp8,0,0.05808533231417338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1536,1,1,128,0,1,fp8,fp8,0,0.0544053316116333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,float16,0,0.03982933362325033
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,1,1,128,0,1,float16,fp8,0,0.039477333426475525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1536,1,1,128,0,1,fp8,fp8,0,0.0377813329299291
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,float16,0,0.03395200024048487
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,1,1,128,0,1,float16,fp8,0,0.03578133384386698
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1536,1,1,128,0,1,fp8,fp8,0,0.03350399931271871
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,float16,0,0.032485333581765495
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,1,1,128,0,1,float16,fp8,0,0.033413333197434746
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1536,1,1,128,0,1,fp8,fp8,0,0.029648000995318096
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,float16,0,0.03142400085926056
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,1,1,128,0,1,float16,fp8,0,0.03178133318821589
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1536,1,1,128,0,1,fp8,fp8,0,0.02918400118748347
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,float16,0,0.031514666974544525
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,1,1,128,0,1,float16,fp8,0,0.03169599920511246
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1536,1,1,128,0,1,fp8,fp8,0,0.029418667157491047
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,fp8,0,0.053317333261171974
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,1,1,128,0,1,float16,float16,0,0.05413866539796194
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1024,1,1,128,0,1,fp8,fp8,0,0.05615466833114624
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,float16,0,0.033573334415753685
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,1,1,128,0,1,float16,fp8,0,0.0355679988861084
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1024,1,1,128,0,1,fp8,fp8,0,0.03345066557327906
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,float16,0,0.029482667644818623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,1,1,128,0,1,float16,fp8,0,0.030165334542592365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1024,1,1,128,0,1,fp8,fp8,0,0.029557332396507263
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,float16,0,0.027232001225153606
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,1,1,128,0,1,float16,fp8,0,0.027285332481066387
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1024,1,1,128,0,1,fp8,fp8,0,0.025557334224383037
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,float16,0,0.025242666403452556
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,1,1,128,0,1,float16,fp8,0,0.025637333591779072
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1024,1,1,128,0,1,fp8,fp8,0,0.02349333216746648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,float16,0,0.025487999121348064
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,1,1,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1024,1,1,128,0,1,fp8,fp8,0,0.02463999887307485
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,float16,0,0.025674665967623394
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,1,1,128,0,1,float16,fp8,0,0.025258667767047882
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1024,1,1,128,0,1,fp8,fp8,0,0.025066666305065155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,1,1,128,0,1,float16,float16,0,0.04607999821503957
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,1,1,128,0,1,float16,fp8,0,0.04574400186538696
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,512,1,1,128,0,1,fp8,fp8,0,0.04797866443792979
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,1,1,128,0,1,float16,float16,0,0.029440000653266907
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,1,1,128,0,1,float16,fp8,0,0.02942933390537898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,512,1,1,128,0,1,fp8,fp8,0,0.02905600021282832
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,1,1,128,0,1,float16,float16,0,0.025455998877684276
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,1,1,128,0,1,float16,fp8,0,0.025546667476495106
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,512,1,1,128,0,1,fp8,fp8,0,0.023376000424226124
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,1,1,128,0,1,float16,float16,0,0.021322667598724365
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,1,1,128,0,1,float16,fp8,0,0.022997332115968067
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,512,1,1,128,0,1,fp8,fp8,0,0.021344001094500225
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,1,1,128,0,1,float16,float16,0,0.02146666745344798
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,1,1,128,0,1,float16,fp8,0,0.021189334491888683
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,512,1,1,128,0,1,fp8,fp8,0,0.02091199904680252
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,1,1,128,0,1,float16,float16,0,0.021205333371957142
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,1,1,128,0,1,float16,fp8,0,0.021402666966120403
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,512,1,1,128,0,1,fp8,fp8,0,0.02077866718173027
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,1,1,128,0,1,float16,fp8,0,0.021317332983016968
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,1,1,128,0,1,fp8,fp8,0,0.019274666905403137
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,512,1,1,128,0,1,float16,float16,0,0.0215786670645078
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,1,1,128,0,1,float16,float16,0,0.021221332252025604
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,1,1,128,0,1,float16,fp8,0,0.021520001192887623
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,512,1,1,128,0,1,fp8,fp8,0,0.01911466692884763
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,1,1,128,0,1,float16,float16,0,0.02123733361562093
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,1,1,128,0,1,float16,fp8,0,0.027866666515668232
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,1,1,128,0,1,float16,float16,0,0.02784000088771184
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,256,1,1,128,0,1,fp8,fp8,0,0.027615999182065327
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,1,1,128,0,1,float16,fp8,0,0.02327466756105423
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,256,1,1,128,0,1,fp8,fp8,0,0.021674667795499165
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,1,1,128,0,1,float16,float16,0,0.019962667177120846
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,1,1,128,0,1,float16,fp8,0,0.02107733239730199
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,1,1,128,0,1,float16,float16,0,0.019296000401178997
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,256,1,1,128,0,1,fp8,fp8,0,0.019141333798567455
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,1,1,128,0,1,fp8,fp8,0,0.019146667172511418
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,256,1,1,128,0,1,float16,fp8,0,0.01942933350801468
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,1,1,128,0,1,float16,float16,0,0.018901333212852478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,1,1,128,0,1,fp8,fp8,0,0.01738133281469345
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,256,1,1,128,0,1,float16,fp8,0,0.018986667195955913
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,1,1,128,0,1,float16,float16,0,0.01961600035429001
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,1,1,128,0,1,float16,fp8,0,0.019648000597953796
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,256,1,1,128,0,1,fp8,fp8,0,0.017071999609470367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,1,1,128,0,1,float16,float16,0,0.019386666516462963
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,1,1,128,0,1,float16,fp8,0,0.019685332973798115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,256,1,1,128,0,1,fp8,fp8,0,0.01746133342385292
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,1,1,128,0,1,float16,float16,0,0.019466667125622433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,1,1,128,0,1,float16,fp8,0,0.019343999524911244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,256,1,1,128,0,1,fp8,fp8,0,0.017077332983414333
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,1,1,128,0,1,float16,float16,0,0.02143466720978419
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,1,1,128,0,1,float16,fp8,0,0.02109333376089732
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,128,1,1,128,0,1,fp8,fp8,0,0.021136000752449036
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,1,1,128,0,1,float16,float16,0,0.01775466650724411
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,1,1,128,0,1,float16,fp8,0,0.01720533271630605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,128,1,1,128,0,1,fp8,fp8,0,0.01725333308180173
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,1,1,128,0,1,float16,float16,0,0.01710933322707812
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,1,1,128,0,1,float16,fp8,0,0.01749333366751671
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,128,1,1,128,0,1,fp8,fp8,0,0.015082667271296183
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,1,1,128,0,1,float16,float16,0,0.017338667064905167
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,1,1,128,0,1,float16,fp8,0,0.017210666090250015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,128,1,1,128,0,1,fp8,fp8,0,0.015066667149464289
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,1,1,128,0,1,float16,float16,0,0.017322666943073273
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,1,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,128,1,1,128,0,1,fp8,fp8,0,0.015040000279744467
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,1,1,128,0,1,float16,float16,0,0.015141333142916361
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,1,1,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,128,1,1,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,1,1,128,0,1,float16,float16,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,1,1,128,0,1,float16,fp8,0,0.015173333386580149
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,128,1,1,128,0,1,fp8,fp8,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,1,1,128,0,1,float16,float16,0,0.01525866612792015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,1,1,128,0,1,float16,fp8,0,0.015263999501864115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,128,1,1,128,0,1,fp8,fp8,0,0.015189333508412043
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,1,1,128,0,1,float16,float16,0,0.017509333789348602
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,1,1,128,0,1,float16,fp8,0,0.01945066700379054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,64,1,1,128,0,1,fp8,fp8,0,0.018138666947682697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,1,1,128,0,1,float16,float16,0,0.01711999997496605
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,1,1,128,0,1,float16,fp8,0,0.017018667111794155
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,64,1,1,128,0,1,fp8,fp8,0,0.01695999999841054
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,1,1,128,0,1,float16,float16,0,0.017125333348910015
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,1,1,128,0,1,float16,fp8,0,0.0162773331006368
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,64,1,1,128,0,1,fp8,fp8,0,0.014981333166360855
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,1,1,128,0,1,float16,float16,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,1,1,128,0,1,float16,fp8,0,0.015562667200962702
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,64,1,1,128,0,1,fp8,fp8,0,0.015050667027632395
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,1,1,128,0,1,float16,float16,0,0.01709866647919019
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,1,1,128,0,1,float16,fp8,0,0.015178666760524115
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,64,1,1,128,0,1,fp8,fp8,0,0.01526933287580808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,1,1,128,0,1,float16,float16,0,0.015295999745527903
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,1,1,128,0,1,float16,fp8,0,0.01498666654030482
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,64,1,1,128,0,1,fp8,fp8,0,0.01533866673707962
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,1,1,128,0,1,float16,float16,0,0.015311999867359797
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,1,1,128,0,1,float16,fp8,0,0.015594666202863058
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,64,1,1,128,0,1,fp8,fp8,0,0.015279999623696009
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,1,1,128,0,1,float16,float16,0,0.015290666371583939
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,1,1,128,0,1,float16,fp8,0,0.01613866661985715
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,64,1,1,128,0,1,fp8,fp8,0,0.015109332899252573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,1,1,128,0,1,float16,float16,0,0.016906666258970898
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,1,1,128,0,1,float16,fp8,0,0.017173333714405697
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,32,1,1,128,0,1,fp8,fp8,0,0.015360000232855478
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,1,1,128,0,1,float16,float16,0,0.01706133286158244
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,1,1,128,0,1,float16,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,32,1,1,128,0,1,fp8,fp8,0,0.014997333288192749
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,1,1,128,0,1,float16,float16,0,0.016901332885026932
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,1,1,128,0,1,float16,float16,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,1,1,128,0,1,float16,fp8,0,0.016965333372354507
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,32,1,1,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,1,1,128,0,1,float16,fp8,0,0.015200000256299973
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,32,1,1,128,0,1,fp8,fp8,0,0.015402667224407196
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,1,1,128,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,1,1,128,0,1,float16,fp8,0,0.017029333859682083
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,32,1,1,128,0,1,fp8,fp8,0,0.015626666446526844
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,1,1,128,0,1,float16,float16,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,1,1,128,0,1,float16,fp8,0,0.015935999651749928
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,32,1,1,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,1,1,128,0,1,float16,float16,0,0.015024000157912573
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,1,1,128,0,1,float16,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,32,1,1,128,0,1,fp8,fp8,0,0.015072000523408255
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,1,1,128,0,1,float16,float16,0,0.014885333677132925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,1,1,128,0,1,float16,fp8,0,0.015237333873907724
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,32,1,1,128,0,1,fp8,fp8,0,0.014991999914248785
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,1,1,128,0,1,float16,float16,0,0.015029333531856537
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,1,1,128,0,1,float16,fp8,0,0.01659199967980385
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,16,1,1,128,0,1,fp8,fp8,0,0.015216000378131866
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,1,1,128,0,1,float16,float16,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,1,1,128,0,1,float16,fp8,0,0.015247999380032221
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,16,1,1,128,0,1,fp8,fp8,0,0.015130666395028433
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,1,1,128,0,1,float16,fp8,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,1,1,128,0,1,float16,float16,0,0.014815999815861383
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,16,1,1,128,0,1,fp8,fp8,0,0.015205333630243937
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,1,1,128,0,1,float16,float16,0,0.015194666882356008
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,1,1,128,0,1,float16,fp8,0,0.015546667079130808
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,1,1,128,0,1,float16,float16,0,0.015098666151364645
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,16,1,1,128,0,1,fp8,fp8,0,0.015392000476519266
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,1,1,128,0,1,float16,fp8,0,0.015365333606799444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,1,1,128,0,1,float16,float16,0,0.015450666348139444
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,16,1,1,128,0,1,fp8,fp8,0,0.015471999843915304
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,1,1,128,0,1,float16,fp8,0,0.015168000012636185
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,16,1,1,128,0,1,fp8,fp8,0,0.015274666249752045
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,1,1,128,0,1,float16,float16,0,0.015504000087579092
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,1,1,128,0,1,fp8,fp8,0,0.014842666685581207
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,16,1,1,128,0,1,float16,fp8,0,0.015413332730531693
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,1,1,128,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,1,1,128,0,1,float16,fp8,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,16,1,1,128,0,1,fp8,fp8,0,0.016186666985352833
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,0,0.017045332739750545
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,0,0.01739199956258138
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,0,0.017157333592573803
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,128,1,1,1,128,0,1,fp8,fp8,0,0.017637333522240322
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,0,0.016943999876578648
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,64,1,1,1,128,0,1,fp8,fp8,0,0.016986666868130367
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,0,0.015344000111023584
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,0,0.014906667172908783
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,32,1,1,1,128,0,1,fp8,fp8,0,0.015381333728631338
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,0,0.015077333897352219
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,0,0.015642666568358738
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,16,1,1,1,128,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,0,0.014965333044528961
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,1,1,128,0,1,fp8,fp8,0,0.014970666418472925
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,0,0.015354666858911514
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,0,0.015103999525308609
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,0,0.01534933348496755
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,4,1,1,1,128,0,1,fp8,fp8,0,0.01575999955336253
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,0,0.014874666929244995
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,0,0.01524266724785169
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,0,0.01504533365368843
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,2,1,1,1,128,0,1,fp8,fp8,0,0.015637333194414776
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,0,0.01553600033124288
TRTLLM,1.2.0rc6.post3,NVIDIA GB300,context_attention,torch_flow,1,1,1,1,128,0,1,fp8,fp8,0,0.015034666905800501
